1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
6; CHECK-LABEL: test_mask_andnot_ps_rr_128:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1]
9; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
11  ret <4 x float> %res
12}
13
14define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
15; X86-LABEL: test_mask_andnot_ps_rrk_128:
16; X86:       # %bb.0:
17; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
18; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
19; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
20; X86-NEXT:    retl # encoding: [0xc3]
21;
22; X64-LABEL: test_mask_andnot_ps_rrk_128:
23; X64:       # %bb.0:
24; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
25; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
26; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
27; X64-NEXT:    retq # encoding: [0xc3]
28  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
29  ret <4 x float> %res
30}
31
32define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
33; X86-LABEL: test_mask_andnot_ps_rrkz_128:
34; X86:       # %bb.0:
35; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
36; X86-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
37; X86-NEXT:    retl # encoding: [0xc3]
38;
39; X64-LABEL: test_mask_andnot_ps_rrkz_128:
40; X64:       # %bb.0:
41; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
42; X64-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
43; X64-NEXT:    retq # encoding: [0xc3]
44  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
45  ret <4 x float> %res
46}
47
48define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
49; X86-LABEL: test_mask_andnot_ps_rm_128:
50; X86:       # %bb.0:
51; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
52; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x00]
53; X86-NEXT:    retl # encoding: [0xc3]
54;
55; X64-LABEL: test_mask_andnot_ps_rm_128:
56; X64:       # %bb.0:
57; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07]
58; X64-NEXT:    retq # encoding: [0xc3]
59  %b = load <4 x float>, <4 x float>* %ptr_b
60  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
61  ret <4 x float> %res
62}
63
64define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
65; X86-LABEL: test_mask_andnot_ps_rmk_128:
66; X86:       # %bb.0:
67; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
68; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
69; X86-NEXT:    vandnps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x08]
70; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
71; X86-NEXT:    retl # encoding: [0xc3]
72;
73; X64-LABEL: test_mask_andnot_ps_rmk_128:
74; X64:       # %bb.0:
75; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
76; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
77; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
78; X64-NEXT:    retq # encoding: [0xc3]
79  %b = load <4 x float>, <4 x float>* %ptr_b
80  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
81  ret <4 x float> %res
82}
83
84define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
85; X86-LABEL: test_mask_andnot_ps_rmkz_128:
86; X86:       # %bb.0:
87; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
88; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
89; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x00]
90; X86-NEXT:    retl # encoding: [0xc3]
91;
92; X64-LABEL: test_mask_andnot_ps_rmkz_128:
93; X64:       # %bb.0:
94; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
95; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
96; X64-NEXT:    retq # encoding: [0xc3]
97  %b = load <4 x float>, <4 x float>* %ptr_b
98  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
99  ret <4 x float> %res
100}
101
102define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
103; X86-LABEL: test_mask_andnot_ps_rmb_128:
104; X86:       # %bb.0:
105; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
106; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x00]
107; X86-NEXT:    retl # encoding: [0xc3]
108;
109; X64-LABEL: test_mask_andnot_ps_rmb_128:
110; X64:       # %bb.0:
111; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
112; X64-NEXT:    retq # encoding: [0xc3]
113  %q = load float, float* %ptr_b
114  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
115  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
116  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
117  ret <4 x float> %res
118}
119
120define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
121; X86-LABEL: test_mask_andnot_ps_rmbk_128:
122; X86:       # %bb.0:
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
124; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
125; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x08]
126; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
127; X86-NEXT:    retl # encoding: [0xc3]
128;
129; X64-LABEL: test_mask_andnot_ps_rmbk_128:
130; X64:       # %bb.0:
131; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
132; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
133; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
134; X64-NEXT:    retq # encoding: [0xc3]
135  %q = load float, float* %ptr_b
136  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
137  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
138  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
139  ret <4 x float> %res
140}
141
142define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
143; X86-LABEL: test_mask_andnot_ps_rmbkz_128:
144; X86:       # %bb.0:
145; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
147; X86-NEXT:    vandnps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x00]
148; X86-NEXT:    retl # encoding: [0xc3]
149;
150; X64-LABEL: test_mask_andnot_ps_rmbkz_128:
151; X64:       # %bb.0:
152; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
153; X64-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
154; X64-NEXT:    retq # encoding: [0xc3]
155  %q = load float, float* %ptr_b
156  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
157  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
158  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
159  ret <4 x float> %res
160}
161
162declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
163
164define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
165; CHECK-LABEL: test_mask_andnot_ps_rr_256:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1]
168; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
169  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
170  ret <8 x float> %res
171}
172
173define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
174; X86-LABEL: test_mask_andnot_ps_rrk_256:
175; X86:       # %bb.0:
176; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
177; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
178; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
179; X86-NEXT:    retl # encoding: [0xc3]
180;
181; X64-LABEL: test_mask_andnot_ps_rrk_256:
182; X64:       # %bb.0:
183; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
184; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
185; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
186; X64-NEXT:    retq # encoding: [0xc3]
187  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
188  ret <8 x float> %res
189}
190
191define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
192; X86-LABEL: test_mask_andnot_ps_rrkz_256:
193; X86:       # %bb.0:
194; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
195; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
196; X86-NEXT:    retl # encoding: [0xc3]
197;
198; X64-LABEL: test_mask_andnot_ps_rrkz_256:
199; X64:       # %bb.0:
200; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
201; X64-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
202; X64-NEXT:    retq # encoding: [0xc3]
203  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
204  ret <8 x float> %res
205}
206
207define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
208; X86-LABEL: test_mask_andnot_ps_rm_256:
209; X86:       # %bb.0:
210; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
211; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x00]
212; X86-NEXT:    retl # encoding: [0xc3]
213;
214; X64-LABEL: test_mask_andnot_ps_rm_256:
215; X64:       # %bb.0:
216; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07]
217; X64-NEXT:    retq # encoding: [0xc3]
218  %b = load <8 x float>, <8 x float>* %ptr_b
219  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
220  ret <8 x float> %res
221}
222
223define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
224; X86-LABEL: test_mask_andnot_ps_rmk_256:
225; X86:       # %bb.0:
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
228; X86-NEXT:    vandnps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x08]
229; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
230; X86-NEXT:    retl # encoding: [0xc3]
231;
232; X64-LABEL: test_mask_andnot_ps_rmk_256:
233; X64:       # %bb.0:
234; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
235; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
236; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
237; X64-NEXT:    retq # encoding: [0xc3]
238  %b = load <8 x float>, <8 x float>* %ptr_b
239  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
240  ret <8 x float> %res
241}
242
243define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
244; X86-LABEL: test_mask_andnot_ps_rmkz_256:
245; X86:       # %bb.0:
246; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
247; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
248; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x00]
249; X86-NEXT:    retl # encoding: [0xc3]
250;
251; X64-LABEL: test_mask_andnot_ps_rmkz_256:
252; X64:       # %bb.0:
253; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
254; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
255; X64-NEXT:    retq # encoding: [0xc3]
256  %b = load <8 x float>, <8 x float>* %ptr_b
257  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
258  ret <8 x float> %res
259}
260
261define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
262; X86-LABEL: test_mask_andnot_ps_rmb_256:
263; X86:       # %bb.0:
264; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
265; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x00]
266; X86-NEXT:    retl # encoding: [0xc3]
267;
268; X64-LABEL: test_mask_andnot_ps_rmb_256:
269; X64:       # %bb.0:
270; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
271; X64-NEXT:    retq # encoding: [0xc3]
272  %q = load float, float* %ptr_b
273  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
274  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
275  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
276  ret <8 x float> %res
277}
278
279define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
280; X86-LABEL: test_mask_andnot_ps_rmbk_256:
281; X86:       # %bb.0:
282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
283; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
284; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x08]
285; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
286; X86-NEXT:    retl # encoding: [0xc3]
287;
288; X64-LABEL: test_mask_andnot_ps_rmbk_256:
289; X64:       # %bb.0:
290; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
291; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
292; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
293; X64-NEXT:    retq # encoding: [0xc3]
294  %q = load float, float* %ptr_b
295  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
296  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
297  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
298  ret <8 x float> %res
299}
300
301define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
302; X86-LABEL: test_mask_andnot_ps_rmbkz_256:
303; X86:       # %bb.0:
304; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
305; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
306; X86-NEXT:    vandnps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x00]
307; X86-NEXT:    retl # encoding: [0xc3]
308;
309; X64-LABEL: test_mask_andnot_ps_rmbkz_256:
310; X64:       # %bb.0:
311; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
312; X64-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
313; X64-NEXT:    retq # encoding: [0xc3]
314  %q = load float, float* %ptr_b
315  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
316  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
317  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
318  ret <8 x float> %res
319}
320
321declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
322
323define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
324; CHECK-LABEL: test_mask_andnot_ps_rr_512:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
327; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
328  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
329  ret <16 x float> %res
330}
331
332define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
333; X86-LABEL: test_mask_andnot_ps_rrk_512:
334; X86:       # %bb.0:
335; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
336; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
337; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
338; X86-NEXT:    retl # encoding: [0xc3]
339;
340; X64-LABEL: test_mask_andnot_ps_rrk_512:
341; X64:       # %bb.0:
342; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
343; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
344; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
345; X64-NEXT:    retq # encoding: [0xc3]
346  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
347  ret <16 x float> %res
348}
349
350define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
351; X86-LABEL: test_mask_andnot_ps_rrkz_512:
352; X86:       # %bb.0:
353; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
354; X86-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
355; X86-NEXT:    retl # encoding: [0xc3]
356;
357; X64-LABEL: test_mask_andnot_ps_rrkz_512:
358; X64:       # %bb.0:
359; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
360; X64-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
361; X64-NEXT:    retq # encoding: [0xc3]
362  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
363  ret <16 x float> %res
364}
365
366define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
367; X86-LABEL: test_mask_andnot_ps_rm_512:
368; X86:       # %bb.0:
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
370; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x00]
371; X86-NEXT:    retl # encoding: [0xc3]
372;
373; X64-LABEL: test_mask_andnot_ps_rm_512:
374; X64:       # %bb.0:
375; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
376; X64-NEXT:    retq # encoding: [0xc3]
377  %b = load <16 x float>, <16 x float>* %ptr_b
378  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
379  ret <16 x float> %res
380}
381
382define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
383; X86-LABEL: test_mask_andnot_ps_rmk_512:
384; X86:       # %bb.0:
385; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
386; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
387; X86-NEXT:    vandnps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x08]
388; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
389; X86-NEXT:    retl # encoding: [0xc3]
390;
391; X64-LABEL: test_mask_andnot_ps_rmk_512:
392; X64:       # %bb.0:
393; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
394; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
395; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
396; X64-NEXT:    retq # encoding: [0xc3]
397  %b = load <16 x float>, <16 x float>* %ptr_b
398  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
399  ret <16 x float> %res
400}
401
402define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
403; X86-LABEL: test_mask_andnot_ps_rmkz_512:
404; X86:       # %bb.0:
405; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
406; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
407; X86-NEXT:    vandnps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x00]
408; X86-NEXT:    retl # encoding: [0xc3]
409;
410; X64-LABEL: test_mask_andnot_ps_rmkz_512:
411; X64:       # %bb.0:
412; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
413; X64-NEXT:    vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
414; X64-NEXT:    retq # encoding: [0xc3]
415  %b = load <16 x float>, <16 x float>* %ptr_b
416  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
417  ret <16 x float> %res
418}
419
420define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
421; X86-LABEL: test_mask_andnot_ps_rmb_512:
422; X86:       # %bb.0:
423; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
424; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x00]
425; X86-NEXT:    retl # encoding: [0xc3]
426;
427; X64-LABEL: test_mask_andnot_ps_rmb_512:
428; X64:       # %bb.0:
429; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
430; X64-NEXT:    retq # encoding: [0xc3]
431  %q = load float, float* %ptr_b
432  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
433  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
434  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
435  ret <16 x float> %res
436}
437
438define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
439; X86-LABEL: test_mask_andnot_ps_rmbk_512:
440; X86:       # %bb.0:
441; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
442; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
443; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x08]
444; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
445; X86-NEXT:    retl # encoding: [0xc3]
446;
447; X64-LABEL: test_mask_andnot_ps_rmbk_512:
448; X64:       # %bb.0:
449; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
450; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
451; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
452; X64-NEXT:    retq # encoding: [0xc3]
453  %q = load float, float* %ptr_b
454  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
455  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
456  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
457  ret <16 x float> %res
458}
459
460define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
461; X86-LABEL: test_mask_andnot_ps_rmbkz_512:
462; X86:       # %bb.0:
463; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
464; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
465; X86-NEXT:    vandnps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x00]
466; X86-NEXT:    retl # encoding: [0xc3]
467;
468; X64-LABEL: test_mask_andnot_ps_rmbkz_512:
469; X64:       # %bb.0:
470; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
471; X64-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
472; X64-NEXT:    retq # encoding: [0xc3]
473  %q = load float, float* %ptr_b
474  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
475  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
476  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
477  ret <16 x float> %res
478}
479
480declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
481
482define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
483; CHECK-LABEL: test_mask_and_ps_rr_128:
484; CHECK:       # %bb.0:
485; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
486; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
487  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
488  ret <4 x float> %res
489}
490
491define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
492; X86-LABEL: test_mask_and_ps_rrk_128:
493; X86:       # %bb.0:
494; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
495; X86-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
496; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
497; X86-NEXT:    retl # encoding: [0xc3]
498;
499; X64-LABEL: test_mask_and_ps_rrk_128:
500; X64:       # %bb.0:
501; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
502; X64-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
503; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
504; X64-NEXT:    retq # encoding: [0xc3]
505  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
506  ret <4 x float> %res
507}
508
509define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
510; X86-LABEL: test_mask_and_ps_rrkz_128:
511; X86:       # %bb.0:
512; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
513; X86-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
514; X86-NEXT:    retl # encoding: [0xc3]
515;
516; X64-LABEL: test_mask_and_ps_rrkz_128:
517; X64:       # %bb.0:
518; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
519; X64-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
520; X64-NEXT:    retq # encoding: [0xc3]
521  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
522  ret <4 x float> %res
523}
524
525define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
526; X86-LABEL: test_mask_and_ps_rm_128:
527; X86:       # %bb.0:
528; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
529; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x00]
530; X86-NEXT:    retl # encoding: [0xc3]
531;
532; X64-LABEL: test_mask_and_ps_rm_128:
533; X64:       # %bb.0:
534; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07]
535; X64-NEXT:    retq # encoding: [0xc3]
536  %b = load <4 x float>, <4 x float>* %ptr_b
537  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
538  ret <4 x float> %res
539}
540
541define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
542; X86-LABEL: test_mask_and_ps_rmk_128:
543; X86:       # %bb.0:
544; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
545; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
546; X86-NEXT:    vandps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x08]
547; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
548; X86-NEXT:    retl # encoding: [0xc3]
549;
550; X64-LABEL: test_mask_and_ps_rmk_128:
551; X64:       # %bb.0:
552; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
553; X64-NEXT:    vandps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
554; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
555; X64-NEXT:    retq # encoding: [0xc3]
556  %b = load <4 x float>, <4 x float>* %ptr_b
557  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
558  ret <4 x float> %res
559}
560
561define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
562; X86-LABEL: test_mask_and_ps_rmkz_128:
563; X86:       # %bb.0:
564; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
565; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
566; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x00]
567; X86-NEXT:    retl # encoding: [0xc3]
568;
569; X64-LABEL: test_mask_and_ps_rmkz_128:
570; X64:       # %bb.0:
571; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
572; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
573; X64-NEXT:    retq # encoding: [0xc3]
574  %b = load <4 x float>, <4 x float>* %ptr_b
575  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
576  ret <4 x float> %res
577}
578
579define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
580; X86-LABEL: test_mask_and_ps_rmb_128:
581; X86:       # %bb.0:
582; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
583; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x00]
584; X86-NEXT:    retl # encoding: [0xc3]
585;
586; X64-LABEL: test_mask_and_ps_rmb_128:
587; X64:       # %bb.0:
588; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
589; X64-NEXT:    retq # encoding: [0xc3]
590  %q = load float, float* %ptr_b
591  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
592  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
593  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
594  ret <4 x float> %res
595}
596
597define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
598; X86-LABEL: test_mask_and_ps_rmbk_128:
599; X86:       # %bb.0:
600; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
601; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
602; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x08]
603; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
604; X86-NEXT:    retl # encoding: [0xc3]
605;
606; X64-LABEL: test_mask_and_ps_rmbk_128:
607; X64:       # %bb.0:
608; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
609; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
610; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
611; X64-NEXT:    retq # encoding: [0xc3]
612  %q = load float, float* %ptr_b
613  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
614  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
615  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
616  ret <4 x float> %res
617}
618
619define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
620; X86-LABEL: test_mask_and_ps_rmbkz_128:
621; X86:       # %bb.0:
622; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
623; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
624; X86-NEXT:    vandps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x00]
625; X86-NEXT:    retl # encoding: [0xc3]
626;
627; X64-LABEL: test_mask_and_ps_rmbkz_128:
628; X64:       # %bb.0:
629; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
630; X64-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
631; X64-NEXT:    retq # encoding: [0xc3]
632  %q = load float, float* %ptr_b
633  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
634  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
635  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
636  ret <4 x float> %res
637}
638
639declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
640
641define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
642; CHECK-LABEL: test_mask_and_ps_rr_256:
643; CHECK:       # %bb.0:
644; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1]
645; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
646  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
647  ret <8 x float> %res
648}
649
650define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
651; X86-LABEL: test_mask_and_ps_rrk_256:
652; X86:       # %bb.0:
653; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
654; X86-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
655; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
656; X86-NEXT:    retl # encoding: [0xc3]
657;
658; X64-LABEL: test_mask_and_ps_rrk_256:
659; X64:       # %bb.0:
660; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
661; X64-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
662; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
663; X64-NEXT:    retq # encoding: [0xc3]
664  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
665  ret <8 x float> %res
666}
667
668define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
669; X86-LABEL: test_mask_and_ps_rrkz_256:
670; X86:       # %bb.0:
671; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
672; X86-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
673; X86-NEXT:    retl # encoding: [0xc3]
674;
675; X64-LABEL: test_mask_and_ps_rrkz_256:
676; X64:       # %bb.0:
677; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
678; X64-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
679; X64-NEXT:    retq # encoding: [0xc3]
680  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
681  ret <8 x float> %res
682}
683
684define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
685; X86-LABEL: test_mask_and_ps_rm_256:
686; X86:       # %bb.0:
687; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
688; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x00]
689; X86-NEXT:    retl # encoding: [0xc3]
690;
691; X64-LABEL: test_mask_and_ps_rm_256:
692; X64:       # %bb.0:
693; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07]
694; X64-NEXT:    retq # encoding: [0xc3]
695  %b = load <8 x float>, <8 x float>* %ptr_b
696  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
697  ret <8 x float> %res
698}
699
700define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
701; X86-LABEL: test_mask_and_ps_rmk_256:
702; X86:       # %bb.0:
703; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
704; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
705; X86-NEXT:    vandps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x08]
706; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
707; X86-NEXT:    retl # encoding: [0xc3]
708;
709; X64-LABEL: test_mask_and_ps_rmk_256:
710; X64:       # %bb.0:
711; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
712; X64-NEXT:    vandps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
713; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
714; X64-NEXT:    retq # encoding: [0xc3]
715  %b = load <8 x float>, <8 x float>* %ptr_b
716  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
717  ret <8 x float> %res
718}
719
720define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
721; X86-LABEL: test_mask_and_ps_rmkz_256:
722; X86:       # %bb.0:
723; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
724; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
725; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x00]
726; X86-NEXT:    retl # encoding: [0xc3]
727;
728; X64-LABEL: test_mask_and_ps_rmkz_256:
729; X64:       # %bb.0:
730; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
731; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
732; X64-NEXT:    retq # encoding: [0xc3]
733  %b = load <8 x float>, <8 x float>* %ptr_b
734  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
735  ret <8 x float> %res
736}
737
738define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
739; X86-LABEL: test_mask_and_ps_rmb_256:
740; X86:       # %bb.0:
741; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
742; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x00]
743; X86-NEXT:    retl # encoding: [0xc3]
744;
745; X64-LABEL: test_mask_and_ps_rmb_256:
746; X64:       # %bb.0:
747; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
748; X64-NEXT:    retq # encoding: [0xc3]
749  %q = load float, float* %ptr_b
750  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
751  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
752  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
753  ret <8 x float> %res
754}
755
756define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
757; X86-LABEL: test_mask_and_ps_rmbk_256:
758; X86:       # %bb.0:
759; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
760; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
761; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x08]
762; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
763; X86-NEXT:    retl # encoding: [0xc3]
764;
765; X64-LABEL: test_mask_and_ps_rmbk_256:
766; X64:       # %bb.0:
767; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
768; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
769; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
770; X64-NEXT:    retq # encoding: [0xc3]
771  %q = load float, float* %ptr_b
772  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
773  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
774  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
775  ret <8 x float> %res
776}
777
778define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
779; X86-LABEL: test_mask_and_ps_rmbkz_256:
780; X86:       # %bb.0:
781; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
782; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
783; X86-NEXT:    vandps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x00]
784; X86-NEXT:    retl # encoding: [0xc3]
785;
786; X64-LABEL: test_mask_and_ps_rmbkz_256:
787; X64:       # %bb.0:
788; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
789; X64-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
790; X64-NEXT:    retq # encoding: [0xc3]
791  %q = load float, float* %ptr_b
792  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
793  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
794  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
795  ret <8 x float> %res
796}
797
798declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
799
800define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
801; CHECK-LABEL: test_mask_and_ps_rr_512:
802; CHECK:       # %bb.0:
803; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
804; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
805  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
806  ret <16 x float> %res
807}
808
809define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
810; X86-LABEL: test_mask_and_ps_rrk_512:
811; X86:       # %bb.0:
812; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
813; X86-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
814; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
815; X86-NEXT:    retl # encoding: [0xc3]
816;
817; X64-LABEL: test_mask_and_ps_rrk_512:
818; X64:       # %bb.0:
819; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
820; X64-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
821; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
822; X64-NEXT:    retq # encoding: [0xc3]
823  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
824  ret <16 x float> %res
825}
826
827define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
828; X86-LABEL: test_mask_and_ps_rrkz_512:
829; X86:       # %bb.0:
830; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
831; X86-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
832; X86-NEXT:    retl # encoding: [0xc3]
833;
834; X64-LABEL: test_mask_and_ps_rrkz_512:
835; X64:       # %bb.0:
836; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
837; X64-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
838; X64-NEXT:    retq # encoding: [0xc3]
839  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
840  ret <16 x float> %res
841}
842
843define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
844; X86-LABEL: test_mask_and_ps_rm_512:
845; X86:       # %bb.0:
846; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
847; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x00]
848; X86-NEXT:    retl # encoding: [0xc3]
849;
850; X64-LABEL: test_mask_and_ps_rm_512:
851; X64:       # %bb.0:
852; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
853; X64-NEXT:    retq # encoding: [0xc3]
854  %b = load <16 x float>, <16 x float>* %ptr_b
855  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
856  ret <16 x float> %res
857}
858
859define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
860; X86-LABEL: test_mask_and_ps_rmk_512:
861; X86:       # %bb.0:
862; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
863; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
864; X86-NEXT:    vandps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x08]
865; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
866; X86-NEXT:    retl # encoding: [0xc3]
867;
868; X64-LABEL: test_mask_and_ps_rmk_512:
869; X64:       # %bb.0:
870; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
871; X64-NEXT:    vandps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
872; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
873; X64-NEXT:    retq # encoding: [0xc3]
874  %b = load <16 x float>, <16 x float>* %ptr_b
875  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
876  ret <16 x float> %res
877}
878
879define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
880; X86-LABEL: test_mask_and_ps_rmkz_512:
881; X86:       # %bb.0:
882; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
883; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
884; X86-NEXT:    vandps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x00]
885; X86-NEXT:    retl # encoding: [0xc3]
886;
887; X64-LABEL: test_mask_and_ps_rmkz_512:
888; X64:       # %bb.0:
889; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
890; X64-NEXT:    vandps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
891; X64-NEXT:    retq # encoding: [0xc3]
892  %b = load <16 x float>, <16 x float>* %ptr_b
893  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
894  ret <16 x float> %res
895}
896
897define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
898; X86-LABEL: test_mask_and_ps_rmb_512:
899; X86:       # %bb.0:
900; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
901; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x00]
902; X86-NEXT:    retl # encoding: [0xc3]
903;
904; X64-LABEL: test_mask_and_ps_rmb_512:
905; X64:       # %bb.0:
906; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
907; X64-NEXT:    retq # encoding: [0xc3]
908  %q = load float, float* %ptr_b
909  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
910  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
911  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
912  ret <16 x float> %res
913}
914
915define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
916; X86-LABEL: test_mask_and_ps_rmbk_512:
917; X86:       # %bb.0:
918; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
919; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
920; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x08]
921; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
922; X86-NEXT:    retl # encoding: [0xc3]
923;
924; X64-LABEL: test_mask_and_ps_rmbk_512:
925; X64:       # %bb.0:
926; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
927; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
928; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
929; X64-NEXT:    retq # encoding: [0xc3]
930  %q = load float, float* %ptr_b
931  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
932  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
933  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
934  ret <16 x float> %res
935}
936
937define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
938; X86-LABEL: test_mask_and_ps_rmbkz_512:
939; X86:       # %bb.0:
940; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
941; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
942; X86-NEXT:    vandps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x00]
943; X86-NEXT:    retl # encoding: [0xc3]
944;
945; X64-LABEL: test_mask_and_ps_rmbkz_512:
946; X64:       # %bb.0:
947; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
948; X64-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
949; X64-NEXT:    retq # encoding: [0xc3]
950  %q = load float, float* %ptr_b
951  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
952  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
953  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
954  ret <16 x float> %res
955}
956
957declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
958
959define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
960; CHECK-LABEL: test_mask_or_ps_rr_128:
961; CHECK:       # %bb.0:
962; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
963; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
964  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
965  ret <4 x float> %res
966}
967
968define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
969; X86-LABEL: test_mask_or_ps_rrk_128:
970; X86:       # %bb.0:
971; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
972; X86-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
973; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
974; X86-NEXT:    retl # encoding: [0xc3]
975;
976; X64-LABEL: test_mask_or_ps_rrk_128:
977; X64:       # %bb.0:
978; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
979; X64-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
980; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
981; X64-NEXT:    retq # encoding: [0xc3]
982  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
983  ret <4 x float> %res
984}
985
986define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
987; X86-LABEL: test_mask_or_ps_rrkz_128:
988; X86:       # %bb.0:
989; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
990; X86-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
991; X86-NEXT:    retl # encoding: [0xc3]
992;
993; X64-LABEL: test_mask_or_ps_rrkz_128:
994; X64:       # %bb.0:
995; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
996; X64-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
997; X64-NEXT:    retq # encoding: [0xc3]
998  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
999  ret <4 x float> %res
1000}
1001
1002define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
1003; X86-LABEL: test_mask_or_ps_rm_128:
1004; X86:       # %bb.0:
1005; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1006; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x00]
1007; X86-NEXT:    retl # encoding: [0xc3]
1008;
1009; X64-LABEL: test_mask_or_ps_rm_128:
1010; X64:       # %bb.0:
1011; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07]
1012; X64-NEXT:    retq # encoding: [0xc3]
1013  %b = load <4 x float>, <4 x float>* %ptr_b
1014  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1015  ret <4 x float> %res
1016}
1017
1018define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
1019; X86-LABEL: test_mask_or_ps_rmk_128:
1020; X86:       # %bb.0:
1021; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1022; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1023; X86-NEXT:    vorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x08]
1024; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1025; X86-NEXT:    retl # encoding: [0xc3]
1026;
1027; X64-LABEL: test_mask_or_ps_rmk_128:
1028; X64:       # %bb.0:
1029; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1030; X64-NEXT:    vorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
1031; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1032; X64-NEXT:    retq # encoding: [0xc3]
1033  %b = load <4 x float>, <4 x float>* %ptr_b
1034  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1035  ret <4 x float> %res
1036}
1037
1038define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
1039; X86-LABEL: test_mask_or_ps_rmkz_128:
1040; X86:       # %bb.0:
1041; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1042; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1043; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x00]
1044; X86-NEXT:    retl # encoding: [0xc3]
1045;
1046; X64-LABEL: test_mask_or_ps_rmkz_128:
1047; X64:       # %bb.0:
1048; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1049; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
1050; X64-NEXT:    retq # encoding: [0xc3]
1051  %b = load <4 x float>, <4 x float>* %ptr_b
1052  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1053  ret <4 x float> %res
1054}
1055
1056define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1057; X86-LABEL: test_mask_or_ps_rmb_128:
1058; X86:       # %bb.0:
1059; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1060; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x00]
1061; X86-NEXT:    retl # encoding: [0xc3]
1062;
1063; X64-LABEL: test_mask_or_ps_rmb_128:
1064; X64:       # %bb.0:
1065; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
1066; X64-NEXT:    retq # encoding: [0xc3]
1067  %q = load float, float* %ptr_b
1068  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1069  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1070  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1071  ret <4 x float> %res
1072}
1073
1074define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1075; X86-LABEL: test_mask_or_ps_rmbk_128:
1076; X86:       # %bb.0:
1077; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1078; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1079; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x08]
1080; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1081; X86-NEXT:    retl # encoding: [0xc3]
1082;
1083; X64-LABEL: test_mask_or_ps_rmbk_128:
1084; X64:       # %bb.0:
1085; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1086; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
1087; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1088; X64-NEXT:    retq # encoding: [0xc3]
1089  %q = load float, float* %ptr_b
1090  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1091  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1092  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1093  ret <4 x float> %res
1094}
1095
1096define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1097; X86-LABEL: test_mask_or_ps_rmbkz_128:
1098; X86:       # %bb.0:
1099; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1100; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1101; X86-NEXT:    vorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x00]
1102; X86-NEXT:    retl # encoding: [0xc3]
1103;
1104; X64-LABEL: test_mask_or_ps_rmbkz_128:
1105; X64:       # %bb.0:
1106; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1107; X64-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
1108; X64-NEXT:    retq # encoding: [0xc3]
1109  %q = load float, float* %ptr_b
1110  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1111  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1112  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1113  ret <4 x float> %res
1114}
1115
1116declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1117
1118define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1119; CHECK-LABEL: test_mask_or_ps_rr_256:
1120; CHECK:       # %bb.0:
1121; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1]
1122; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1123  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1124  ret <8 x float> %res
1125}
1126
1127define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1128; X86-LABEL: test_mask_or_ps_rrk_256:
1129; X86:       # %bb.0:
1130; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1131; X86-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
1132; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1133; X86-NEXT:    retl # encoding: [0xc3]
1134;
1135; X64-LABEL: test_mask_or_ps_rrk_256:
1136; X64:       # %bb.0:
1137; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1138; X64-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
1139; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1140; X64-NEXT:    retq # encoding: [0xc3]
1141  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1142  ret <8 x float> %res
1143}
1144
1145define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1146; X86-LABEL: test_mask_or_ps_rrkz_256:
1147; X86:       # %bb.0:
1148; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1149; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
1150; X86-NEXT:    retl # encoding: [0xc3]
1151;
1152; X64-LABEL: test_mask_or_ps_rrkz_256:
1153; X64:       # %bb.0:
1154; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1155; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
1156; X64-NEXT:    retq # encoding: [0xc3]
1157  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1158  ret <8 x float> %res
1159}
1160
1161define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1162; X86-LABEL: test_mask_or_ps_rm_256:
1163; X86:       # %bb.0:
1164; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1165; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x00]
1166; X86-NEXT:    retl # encoding: [0xc3]
1167;
1168; X64-LABEL: test_mask_or_ps_rm_256:
1169; X64:       # %bb.0:
1170; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07]
1171; X64-NEXT:    retq # encoding: [0xc3]
1172  %b = load <8 x float>, <8 x float>* %ptr_b
1173  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1174  ret <8 x float> %res
1175}
1176
1177define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1178; X86-LABEL: test_mask_or_ps_rmk_256:
1179; X86:       # %bb.0:
1180; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1181; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1182; X86-NEXT:    vorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x08]
1183; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1184; X86-NEXT:    retl # encoding: [0xc3]
1185;
1186; X64-LABEL: test_mask_or_ps_rmk_256:
1187; X64:       # %bb.0:
1188; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1189; X64-NEXT:    vorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
1190; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1191; X64-NEXT:    retq # encoding: [0xc3]
1192  %b = load <8 x float>, <8 x float>* %ptr_b
1193  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1194  ret <8 x float> %res
1195}
1196
1197define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1198; X86-LABEL: test_mask_or_ps_rmkz_256:
1199; X86:       # %bb.0:
1200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1201; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1202; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x00]
1203; X86-NEXT:    retl # encoding: [0xc3]
1204;
1205; X64-LABEL: test_mask_or_ps_rmkz_256:
1206; X64:       # %bb.0:
1207; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1208; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
1209; X64-NEXT:    retq # encoding: [0xc3]
1210  %b = load <8 x float>, <8 x float>* %ptr_b
1211  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1212  ret <8 x float> %res
1213}
1214
1215define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1216; X86-LABEL: test_mask_or_ps_rmb_256:
1217; X86:       # %bb.0:
1218; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1219; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x00]
1220; X86-NEXT:    retl # encoding: [0xc3]
1221;
1222; X64-LABEL: test_mask_or_ps_rmb_256:
1223; X64:       # %bb.0:
1224; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
1225; X64-NEXT:    retq # encoding: [0xc3]
1226  %q = load float, float* %ptr_b
1227  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1228  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1229  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1230  ret <8 x float> %res
1231}
1232
1233define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1234; X86-LABEL: test_mask_or_ps_rmbk_256:
1235; X86:       # %bb.0:
1236; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1237; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1238; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x08]
1239; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1240; X86-NEXT:    retl # encoding: [0xc3]
1241;
1242; X64-LABEL: test_mask_or_ps_rmbk_256:
1243; X64:       # %bb.0:
1244; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1245; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
1246; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1247; X64-NEXT:    retq # encoding: [0xc3]
1248  %q = load float, float* %ptr_b
1249  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1250  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1251  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1252  ret <8 x float> %res
1253}
1254
1255define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1256; X86-LABEL: test_mask_or_ps_rmbkz_256:
1257; X86:       # %bb.0:
1258; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1259; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1260; X86-NEXT:    vorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x00]
1261; X86-NEXT:    retl # encoding: [0xc3]
1262;
1263; X64-LABEL: test_mask_or_ps_rmbkz_256:
1264; X64:       # %bb.0:
1265; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1266; X64-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
1267; X64-NEXT:    retq # encoding: [0xc3]
1268  %q = load float, float* %ptr_b
1269  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1270  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1271  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1272  ret <8 x float> %res
1273}
1274
1275declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1276
1277define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1278; CHECK-LABEL: test_mask_or_ps_rr_512:
1279; CHECK:       # %bb.0:
1280; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
1281; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1282  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1283  ret <16 x float> %res
1284}
1285
1286define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1287; X86-LABEL: test_mask_or_ps_rrk_512:
1288; X86:       # %bb.0:
1289; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1290; X86-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
1291; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1292; X86-NEXT:    retl # encoding: [0xc3]
1293;
1294; X64-LABEL: test_mask_or_ps_rrk_512:
1295; X64:       # %bb.0:
1296; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1297; X64-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
1298; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1299; X64-NEXT:    retq # encoding: [0xc3]
1300  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1301  ret <16 x float> %res
1302}
1303
1304define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1305; X86-LABEL: test_mask_or_ps_rrkz_512:
1306; X86:       # %bb.0:
1307; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1308; X86-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
1309; X86-NEXT:    retl # encoding: [0xc3]
1310;
1311; X64-LABEL: test_mask_or_ps_rrkz_512:
1312; X64:       # %bb.0:
1313; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1314; X64-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
1315; X64-NEXT:    retq # encoding: [0xc3]
1316  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1317  ret <16 x float> %res
1318}
1319
1320define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1321; X86-LABEL: test_mask_or_ps_rm_512:
1322; X86:       # %bb.0:
1323; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1324; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x00]
1325; X86-NEXT:    retl # encoding: [0xc3]
1326;
1327; X64-LABEL: test_mask_or_ps_rm_512:
1328; X64:       # %bb.0:
1329; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
1330; X64-NEXT:    retq # encoding: [0xc3]
1331  %b = load <16 x float>, <16 x float>* %ptr_b
1332  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1333  ret <16 x float> %res
1334}
1335
1336define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1337; X86-LABEL: test_mask_or_ps_rmk_512:
1338; X86:       # %bb.0:
1339; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1340; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1341; X86-NEXT:    vorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x08]
1342; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1343; X86-NEXT:    retl # encoding: [0xc3]
1344;
1345; X64-LABEL: test_mask_or_ps_rmk_512:
1346; X64:       # %bb.0:
1347; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1348; X64-NEXT:    vorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
1349; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1350; X64-NEXT:    retq # encoding: [0xc3]
1351  %b = load <16 x float>, <16 x float>* %ptr_b
1352  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1353  ret <16 x float> %res
1354}
1355
1356define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1357; X86-LABEL: test_mask_or_ps_rmkz_512:
1358; X86:       # %bb.0:
1359; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1360; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1361; X86-NEXT:    vorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x00]
1362; X86-NEXT:    retl # encoding: [0xc3]
1363;
1364; X64-LABEL: test_mask_or_ps_rmkz_512:
1365; X64:       # %bb.0:
1366; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1367; X64-NEXT:    vorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
1368; X64-NEXT:    retq # encoding: [0xc3]
1369  %b = load <16 x float>, <16 x float>* %ptr_b
1370  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1371  ret <16 x float> %res
1372}
1373
1374define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1375; X86-LABEL: test_mask_or_ps_rmb_512:
1376; X86:       # %bb.0:
1377; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1378; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x00]
1379; X86-NEXT:    retl # encoding: [0xc3]
1380;
1381; X64-LABEL: test_mask_or_ps_rmb_512:
1382; X64:       # %bb.0:
1383; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
1384; X64-NEXT:    retq # encoding: [0xc3]
1385  %q = load float, float* %ptr_b
1386  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1387  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1388  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1389  ret <16 x float> %res
1390}
1391
1392define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1393; X86-LABEL: test_mask_or_ps_rmbk_512:
1394; X86:       # %bb.0:
1395; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1396; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1397; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x08]
1398; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1399; X86-NEXT:    retl # encoding: [0xc3]
1400;
1401; X64-LABEL: test_mask_or_ps_rmbk_512:
1402; X64:       # %bb.0:
1403; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1404; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
1405; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1406; X64-NEXT:    retq # encoding: [0xc3]
1407  %q = load float, float* %ptr_b
1408  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1409  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1410  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1411  ret <16 x float> %res
1412}
1413
1414define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1415; X86-LABEL: test_mask_or_ps_rmbkz_512:
1416; X86:       # %bb.0:
1417; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1418; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1419; X86-NEXT:    vorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x00]
1420; X86-NEXT:    retl # encoding: [0xc3]
1421;
1422; X64-LABEL: test_mask_or_ps_rmbkz_512:
1423; X64:       # %bb.0:
1424; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1425; X64-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
1426; X64-NEXT:    retq # encoding: [0xc3]
1427  %q = load float, float* %ptr_b
1428  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1429  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1430  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1431  ret <16 x float> %res
1432}
1433
1434declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1435
1436define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
1437; CHECK-LABEL: test_mask_xor_ps_rr_128:
1438; CHECK:       # %bb.0:
1439; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
1440; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1441  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1442  ret <4 x float> %res
1443}
1444
1445define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
1446; X86-LABEL: test_mask_xor_ps_rrk_128:
1447; X86:       # %bb.0:
1448; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1449; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
1450; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1451; X86-NEXT:    retl # encoding: [0xc3]
1452;
1453; X64-LABEL: test_mask_xor_ps_rrk_128:
1454; X64:       # %bb.0:
1455; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1456; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
1457; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1458; X64-NEXT:    retq # encoding: [0xc3]
1459  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1460  ret <4 x float> %res
1461}
1462
1463define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
1464; X86-LABEL: test_mask_xor_ps_rrkz_128:
1465; X86:       # %bb.0:
1466; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1467; X86-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
1468; X86-NEXT:    retl # encoding: [0xc3]
1469;
1470; X64-LABEL: test_mask_xor_ps_rrkz_128:
1471; X64:       # %bb.0:
1472; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1473; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
1474; X64-NEXT:    retq # encoding: [0xc3]
1475  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1476  ret <4 x float> %res
1477}
1478
1479define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
1480; X86-LABEL: test_mask_xor_ps_rm_128:
1481; X86:       # %bb.0:
1482; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1483; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x00]
1484; X86-NEXT:    retl # encoding: [0xc3]
1485;
1486; X64-LABEL: test_mask_xor_ps_rm_128:
1487; X64:       # %bb.0:
1488; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07]
1489; X64-NEXT:    retq # encoding: [0xc3]
1490  %b = load <4 x float>, <4 x float>* %ptr_b
1491  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1492  ret <4 x float> %res
1493}
1494
1495define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
1496; X86-LABEL: test_mask_xor_ps_rmk_128:
1497; X86:       # %bb.0:
1498; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1499; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1500; X86-NEXT:    vxorps (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x08]
1501; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1502; X86-NEXT:    retl # encoding: [0xc3]
1503;
1504; X64-LABEL: test_mask_xor_ps_rmk_128:
1505; X64:       # %bb.0:
1506; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1507; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
1508; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1509; X64-NEXT:    retq # encoding: [0xc3]
1510  %b = load <4 x float>, <4 x float>* %ptr_b
1511  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1512  ret <4 x float> %res
1513}
1514
1515define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
1516; X86-LABEL: test_mask_xor_ps_rmkz_128:
1517; X86:       # %bb.0:
1518; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1519; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1520; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x00]
1521; X86-NEXT:    retl # encoding: [0xc3]
1522;
1523; X64-LABEL: test_mask_xor_ps_rmkz_128:
1524; X64:       # %bb.0:
1525; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1526; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
1527; X64-NEXT:    retq # encoding: [0xc3]
1528  %b = load <4 x float>, <4 x float>* %ptr_b
1529  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1530  ret <4 x float> %res
1531}
1532
1533define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1534; X86-LABEL: test_mask_xor_ps_rmb_128:
1535; X86:       # %bb.0:
1536; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1537; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x00]
1538; X86-NEXT:    retl # encoding: [0xc3]
1539;
1540; X64-LABEL: test_mask_xor_ps_rmb_128:
1541; X64:       # %bb.0:
1542; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
1543; X64-NEXT:    retq # encoding: [0xc3]
1544  %q = load float, float* %ptr_b
1545  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1546  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1547  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1548  ret <4 x float> %res
1549}
1550
1551define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1552; X86-LABEL: test_mask_xor_ps_rmbk_128:
1553; X86:       # %bb.0:
1554; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1555; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1556; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x08]
1557; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1558; X86-NEXT:    retl # encoding: [0xc3]
1559;
1560; X64-LABEL: test_mask_xor_ps_rmbk_128:
1561; X64:       # %bb.0:
1562; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1563; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
1564; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1565; X64-NEXT:    retq # encoding: [0xc3]
1566  %q = load float, float* %ptr_b
1567  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1568  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1569  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1570  ret <4 x float> %res
1571}
1572
1573define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1574; X86-LABEL: test_mask_xor_ps_rmbkz_128:
1575; X86:       # %bb.0:
1576; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1577; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1578; X86-NEXT:    vxorps (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x00]
1579; X86-NEXT:    retl # encoding: [0xc3]
1580;
1581; X64-LABEL: test_mask_xor_ps_rmbkz_128:
1582; X64:       # %bb.0:
1583; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1584; X64-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
1585; X64-NEXT:    retq # encoding: [0xc3]
1586  %q = load float, float* %ptr_b
1587  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1588  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1589  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1590  ret <4 x float> %res
1591}
1592
1593declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1594
1595define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1596; CHECK-LABEL: test_mask_xor_ps_rr_256:
1597; CHECK:       # %bb.0:
1598; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1]
1599; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1600  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1601  ret <8 x float> %res
1602}
1603
1604define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1605; X86-LABEL: test_mask_xor_ps_rrk_256:
1606; X86:       # %bb.0:
1607; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1608; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
1609; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1610; X86-NEXT:    retl # encoding: [0xc3]
1611;
1612; X64-LABEL: test_mask_xor_ps_rrk_256:
1613; X64:       # %bb.0:
1614; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1615; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
1616; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1617; X64-NEXT:    retq # encoding: [0xc3]
1618  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1619  ret <8 x float> %res
1620}
1621
1622define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1623; X86-LABEL: test_mask_xor_ps_rrkz_256:
1624; X86:       # %bb.0:
1625; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1626; X86-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
1627; X86-NEXT:    retl # encoding: [0xc3]
1628;
1629; X64-LABEL: test_mask_xor_ps_rrkz_256:
1630; X64:       # %bb.0:
1631; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1632; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
1633; X64-NEXT:    retq # encoding: [0xc3]
1634  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1635  ret <8 x float> %res
1636}
1637
1638define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1639; X86-LABEL: test_mask_xor_ps_rm_256:
1640; X86:       # %bb.0:
1641; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1642; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x00]
1643; X86-NEXT:    retl # encoding: [0xc3]
1644;
1645; X64-LABEL: test_mask_xor_ps_rm_256:
1646; X64:       # %bb.0:
1647; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07]
1648; X64-NEXT:    retq # encoding: [0xc3]
1649  %b = load <8 x float>, <8 x float>* %ptr_b
1650  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1651  ret <8 x float> %res
1652}
1653
1654define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1655; X86-LABEL: test_mask_xor_ps_rmk_256:
1656; X86:       # %bb.0:
1657; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1658; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1659; X86-NEXT:    vxorps (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x08]
1660; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1661; X86-NEXT:    retl # encoding: [0xc3]
1662;
1663; X64-LABEL: test_mask_xor_ps_rmk_256:
1664; X64:       # %bb.0:
1665; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1666; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
1667; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1668; X64-NEXT:    retq # encoding: [0xc3]
1669  %b = load <8 x float>, <8 x float>* %ptr_b
1670  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1671  ret <8 x float> %res
1672}
1673
1674define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1675; X86-LABEL: test_mask_xor_ps_rmkz_256:
1676; X86:       # %bb.0:
1677; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1678; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1679; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x00]
1680; X86-NEXT:    retl # encoding: [0xc3]
1681;
1682; X64-LABEL: test_mask_xor_ps_rmkz_256:
1683; X64:       # %bb.0:
1684; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1685; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
1686; X64-NEXT:    retq # encoding: [0xc3]
1687  %b = load <8 x float>, <8 x float>* %ptr_b
1688  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1689  ret <8 x float> %res
1690}
1691
1692define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1693; X86-LABEL: test_mask_xor_ps_rmb_256:
1694; X86:       # %bb.0:
1695; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1696; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x00]
1697; X86-NEXT:    retl # encoding: [0xc3]
1698;
1699; X64-LABEL: test_mask_xor_ps_rmb_256:
1700; X64:       # %bb.0:
1701; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
1702; X64-NEXT:    retq # encoding: [0xc3]
1703  %q = load float, float* %ptr_b
1704  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1705  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1706  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1707  ret <8 x float> %res
1708}
1709
1710define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1711; X86-LABEL: test_mask_xor_ps_rmbk_256:
1712; X86:       # %bb.0:
1713; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1714; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1715; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x08]
1716; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1717; X86-NEXT:    retl # encoding: [0xc3]
1718;
1719; X64-LABEL: test_mask_xor_ps_rmbk_256:
1720; X64:       # %bb.0:
1721; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1722; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
1723; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1724; X64-NEXT:    retq # encoding: [0xc3]
1725  %q = load float, float* %ptr_b
1726  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1727  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1728  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1729  ret <8 x float> %res
1730}
1731
1732define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1733; X86-LABEL: test_mask_xor_ps_rmbkz_256:
1734; X86:       # %bb.0:
1735; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1736; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1737; X86-NEXT:    vxorps (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x00]
1738; X86-NEXT:    retl # encoding: [0xc3]
1739;
1740; X64-LABEL: test_mask_xor_ps_rmbkz_256:
1741; X64:       # %bb.0:
1742; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1743; X64-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
1744; X64-NEXT:    retq # encoding: [0xc3]
1745  %q = load float, float* %ptr_b
1746  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1747  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1748  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1749  ret <8 x float> %res
1750}
1751
1752declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1753
1754define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1755; CHECK-LABEL: test_mask_xor_ps_rr_512:
1756; CHECK:       # %bb.0:
1757; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
1758; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1759  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1760  ret <16 x float> %res
1761}
1762
1763define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1764; X86-LABEL: test_mask_xor_ps_rrk_512:
1765; X86:       # %bb.0:
1766; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1767; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
1768; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1769; X86-NEXT:    retl # encoding: [0xc3]
1770;
1771; X64-LABEL: test_mask_xor_ps_rrk_512:
1772; X64:       # %bb.0:
1773; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1774; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
1775; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1776; X64-NEXT:    retq # encoding: [0xc3]
1777  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1778  ret <16 x float> %res
1779}
1780
1781define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1782; X86-LABEL: test_mask_xor_ps_rrkz_512:
1783; X86:       # %bb.0:
1784; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1785; X86-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
1786; X86-NEXT:    retl # encoding: [0xc3]
1787;
1788; X64-LABEL: test_mask_xor_ps_rrkz_512:
1789; X64:       # %bb.0:
1790; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1791; X64-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
1792; X64-NEXT:    retq # encoding: [0xc3]
1793  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1794  ret <16 x float> %res
1795}
1796
1797define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1798; X86-LABEL: test_mask_xor_ps_rm_512:
1799; X86:       # %bb.0:
1800; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1801; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x00]
1802; X86-NEXT:    retl # encoding: [0xc3]
1803;
1804; X64-LABEL: test_mask_xor_ps_rm_512:
1805; X64:       # %bb.0:
1806; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
1807; X64-NEXT:    retq # encoding: [0xc3]
1808  %b = load <16 x float>, <16 x float>* %ptr_b
1809  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1810  ret <16 x float> %res
1811}
1812
1813define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1814; X86-LABEL: test_mask_xor_ps_rmk_512:
1815; X86:       # %bb.0:
1816; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1817; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1818; X86-NEXT:    vxorps (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x08]
1819; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1820; X86-NEXT:    retl # encoding: [0xc3]
1821;
1822; X64-LABEL: test_mask_xor_ps_rmk_512:
1823; X64:       # %bb.0:
1824; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1825; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
1826; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1827; X64-NEXT:    retq # encoding: [0xc3]
1828  %b = load <16 x float>, <16 x float>* %ptr_b
1829  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1830  ret <16 x float> %res
1831}
1832
1833define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1834; X86-LABEL: test_mask_xor_ps_rmkz_512:
1835; X86:       # %bb.0:
1836; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1837; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1838; X86-NEXT:    vxorps (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x00]
1839; X86-NEXT:    retl # encoding: [0xc3]
1840;
1841; X64-LABEL: test_mask_xor_ps_rmkz_512:
1842; X64:       # %bb.0:
1843; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1844; X64-NEXT:    vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
1845; X64-NEXT:    retq # encoding: [0xc3]
1846  %b = load <16 x float>, <16 x float>* %ptr_b
1847  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1848  ret <16 x float> %res
1849}
1850
1851define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1852; X86-LABEL: test_mask_xor_ps_rmb_512:
1853; X86:       # %bb.0:
1854; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1855; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x00]
1856; X86-NEXT:    retl # encoding: [0xc3]
1857;
1858; X64-LABEL: test_mask_xor_ps_rmb_512:
1859; X64:       # %bb.0:
1860; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
1861; X64-NEXT:    retq # encoding: [0xc3]
1862  %q = load float, float* %ptr_b
1863  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1864  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1865  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1866  ret <16 x float> %res
1867}
1868
1869define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1870; X86-LABEL: test_mask_xor_ps_rmbk_512:
1871; X86:       # %bb.0:
1872; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1873; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1874; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x08]
1875; X86-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1876; X86-NEXT:    retl # encoding: [0xc3]
1877;
1878; X64-LABEL: test_mask_xor_ps_rmbk_512:
1879; X64:       # %bb.0:
1880; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1881; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
1882; X64-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1883; X64-NEXT:    retq # encoding: [0xc3]
1884  %q = load float, float* %ptr_b
1885  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1886  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1887  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1888  ret <16 x float> %res
1889}
1890
1891define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1892; X86-LABEL: test_mask_xor_ps_rmbkz_512:
1893; X86:       # %bb.0:
1894; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1895; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1896; X86-NEXT:    vxorps (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x00]
1897; X86-NEXT:    retl # encoding: [0xc3]
1898;
1899; X64-LABEL: test_mask_xor_ps_rmbkz_512:
1900; X64:       # %bb.0:
1901; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1902; X64-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
1903; X64-NEXT:    retq # encoding: [0xc3]
1904  %q = load float, float* %ptr_b
1905  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1906  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1907  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1908  ret <16 x float> %res
1909}
1910
1911declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1912
1913define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
1914; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
1915; CHECK:       # %bb.0:
1916; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
1917; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1918  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1919  ret <8 x i64> %res
1920}
1921
1922define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1923; X86-LABEL: test_mask_mullo_epi64_rrk_512:
1924; X86:       # %bb.0:
1925; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1926; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
1927; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1928; X86-NEXT:    retl # encoding: [0xc3]
1929;
1930; X64-LABEL: test_mask_mullo_epi64_rrk_512:
1931; X64:       # %bb.0:
1932; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1933; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
1934; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1935; X64-NEXT:    retq # encoding: [0xc3]
1936  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1937  ret <8 x i64> %res
1938}
1939
1940define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1941; X86-LABEL: test_mask_mullo_epi64_rrkz_512:
1942; X86:       # %bb.0:
1943; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1944; X86-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
1945; X86-NEXT:    retl # encoding: [0xc3]
1946;
1947; X64-LABEL: test_mask_mullo_epi64_rrkz_512:
1948; X64:       # %bb.0:
1949; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1950; X64-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
1951; X64-NEXT:    retq # encoding: [0xc3]
1952  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1953  ret <8 x i64> %res
1954}
1955
1956define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
1957; X86-LABEL: test_mask_mullo_epi64_rm_512:
1958; X86:       # %bb.0:
1959; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1960; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x00]
1961; X86-NEXT:    retl # encoding: [0xc3]
1962;
1963; X64-LABEL: test_mask_mullo_epi64_rm_512:
1964; X64:       # %bb.0:
1965; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
1966; X64-NEXT:    retq # encoding: [0xc3]
1967  %b = load <8 x i64>, <8 x i64>* %ptr_b
1968  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1969  ret <8 x i64> %res
1970}
1971
1972define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1973; X86-LABEL: test_mask_mullo_epi64_rmk_512:
1974; X86:       # %bb.0:
1975; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1976; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1977; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x08]
1978; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1979; X86-NEXT:    retl # encoding: [0xc3]
1980;
1981; X64-LABEL: test_mask_mullo_epi64_rmk_512:
1982; X64:       # %bb.0:
1983; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1984; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
1985; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1986; X64-NEXT:    retq # encoding: [0xc3]
1987  %b = load <8 x i64>, <8 x i64>* %ptr_b
1988  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1989  ret <8 x i64> %res
1990}
1991
1992define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1993; X86-LABEL: test_mask_mullo_epi64_rmkz_512:
1994; X86:       # %bb.0:
1995; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1996; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1997; X86-NEXT:    vpmullq (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x00]
1998; X86-NEXT:    retl # encoding: [0xc3]
1999;
2000; X64-LABEL: test_mask_mullo_epi64_rmkz_512:
2001; X64:       # %bb.0:
2002; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2003; X64-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
2004; X64-NEXT:    retq # encoding: [0xc3]
2005  %b = load <8 x i64>, <8 x i64>* %ptr_b
2006  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2007  ret <8 x i64> %res
2008}
2009
2010define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
2011; X86-LABEL: test_mask_mullo_epi64_rmb_512:
2012; X86:       # %bb.0:
2013; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2014; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x00]
2015; X86-NEXT:    retl # encoding: [0xc3]
2016;
2017; X64-LABEL: test_mask_mullo_epi64_rmb_512:
2018; X64:       # %bb.0:
2019; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
2020; X64-NEXT:    retq # encoding: [0xc3]
2021  %q = load i64, i64* %ptr_b
2022  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2023  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2024  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2025  ret <8 x i64> %res
2026}
2027
2028define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2029; X86-LABEL: test_mask_mullo_epi64_rmbk_512:
2030; X86:       # %bb.0:
2031; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2032; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2033; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x08]
2034; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2035; X86-NEXT:    retl # encoding: [0xc3]
2036;
2037; X64-LABEL: test_mask_mullo_epi64_rmbk_512:
2038; X64:       # %bb.0:
2039; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2040; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
2041; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2042; X64-NEXT:    retq # encoding: [0xc3]
2043  %q = load i64, i64* %ptr_b
2044  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2045  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2046  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2047  ret <8 x i64> %res
2048}
2049
2050define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2051; X86-LABEL: test_mask_mullo_epi64_rmbkz_512:
2052; X86:       # %bb.0:
2053; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2054; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2055; X86-NEXT:    vpmullq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x00]
2056; X86-NEXT:    retl # encoding: [0xc3]
2057;
2058; X64-LABEL: test_mask_mullo_epi64_rmbkz_512:
2059; X64:       # %bb.0:
2060; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2061; X64-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
2062; X64-NEXT:    retq # encoding: [0xc3]
2063  %q = load i64, i64* %ptr_b
2064  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2065  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2066  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2067  ret <8 x i64> %res
2068}
2069declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2070
2071define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
2072; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
2073; CHECK:       # %bb.0:
2074; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
2075; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2076  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2077  ret <4 x i64> %res
2078}
2079
2080define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
2081; X86-LABEL: test_mask_mullo_epi64_rrk_256:
2082; X86:       # %bb.0:
2083; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2084; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
2085; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2086; X86-NEXT:    retl # encoding: [0xc3]
2087;
2088; X64-LABEL: test_mask_mullo_epi64_rrk_256:
2089; X64:       # %bb.0:
2090; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2091; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
2092; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2093; X64-NEXT:    retq # encoding: [0xc3]
2094  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2095  ret <4 x i64> %res
2096}
2097
2098define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2099; X86-LABEL: test_mask_mullo_epi64_rrkz_256:
2100; X86:       # %bb.0:
2101; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2102; X86-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
2103; X86-NEXT:    retl # encoding: [0xc3]
2104;
2105; X64-LABEL: test_mask_mullo_epi64_rrkz_256:
2106; X64:       # %bb.0:
2107; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2108; X64-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
2109; X64-NEXT:    retq # encoding: [0xc3]
2110  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2111  ret <4 x i64> %res
2112}
2113
2114define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
2115; X86-LABEL: test_mask_mullo_epi64_rm_256:
2116; X86:       # %bb.0:
2117; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2118; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x00]
2119; X86-NEXT:    retl # encoding: [0xc3]
2120;
2121; X64-LABEL: test_mask_mullo_epi64_rm_256:
2122; X64:       # %bb.0:
2123; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
2124; X64-NEXT:    retq # encoding: [0xc3]
2125  %b = load <4 x i64>, <4 x i64>* %ptr_b
2126  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2127  ret <4 x i64> %res
2128}
2129
2130define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2131; X86-LABEL: test_mask_mullo_epi64_rmk_256:
2132; X86:       # %bb.0:
2133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2134; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2135; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x08]
2136; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2137; X86-NEXT:    retl # encoding: [0xc3]
2138;
2139; X64-LABEL: test_mask_mullo_epi64_rmk_256:
2140; X64:       # %bb.0:
2141; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2142; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
2143; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2144; X64-NEXT:    retq # encoding: [0xc3]
2145  %b = load <4 x i64>, <4 x i64>* %ptr_b
2146  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2147  ret <4 x i64> %res
2148}
2149
2150define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
2151; X86-LABEL: test_mask_mullo_epi64_rmkz_256:
2152; X86:       # %bb.0:
2153; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2154; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2155; X86-NEXT:    vpmullq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x00]
2156; X86-NEXT:    retl # encoding: [0xc3]
2157;
2158; X64-LABEL: test_mask_mullo_epi64_rmkz_256:
2159; X64:       # %bb.0:
2160; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2161; X64-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
2162; X64-NEXT:    retq # encoding: [0xc3]
2163  %b = load <4 x i64>, <4 x i64>* %ptr_b
2164  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2165  ret <4 x i64> %res
2166}
2167
2168define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
2169; X86-LABEL: test_mask_mullo_epi64_rmb_256:
2170; X86:       # %bb.0:
2171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2172; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x00]
2173; X86-NEXT:    retl # encoding: [0xc3]
2174;
2175; X64-LABEL: test_mask_mullo_epi64_rmb_256:
2176; X64:       # %bb.0:
2177; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
2178; X64-NEXT:    retq # encoding: [0xc3]
2179  %q = load i64, i64* %ptr_b
2180  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2181  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2182  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2183  ret <4 x i64> %res
2184}
2185
2186define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2187; X86-LABEL: test_mask_mullo_epi64_rmbk_256:
2188; X86:       # %bb.0:
2189; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2190; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2191; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x08]
2192; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2193; X86-NEXT:    retl # encoding: [0xc3]
2194;
2195; X64-LABEL: test_mask_mullo_epi64_rmbk_256:
2196; X64:       # %bb.0:
2197; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2198; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
2199; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2200; X64-NEXT:    retq # encoding: [0xc3]
2201  %q = load i64, i64* %ptr_b
2202  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2203  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2204  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2205  ret <4 x i64> %res
2206}
2207
2208define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
2209; X86-LABEL: test_mask_mullo_epi64_rmbkz_256:
2210; X86:       # %bb.0:
2211; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2212; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2213; X86-NEXT:    vpmullq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x00]
2214; X86-NEXT:    retl # encoding: [0xc3]
2215;
2216; X64-LABEL: test_mask_mullo_epi64_rmbkz_256:
2217; X64:       # %bb.0:
2218; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2219; X64-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
2220; X64-NEXT:    retq # encoding: [0xc3]
2221  %q = load i64, i64* %ptr_b
2222  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2223  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2224  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2225  ret <4 x i64> %res
2226}
2227
2228declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2229
2230define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
2231; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
2232; CHECK:       # %bb.0:
2233; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
2234; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2235  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2236  ret <2 x i64> %res
2237}
2238
2239define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
2240; X86-LABEL: test_mask_mullo_epi64_rrk_128:
2241; X86:       # %bb.0:
2242; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2243; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
2244; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2245; X86-NEXT:    retl # encoding: [0xc3]
2246;
2247; X64-LABEL: test_mask_mullo_epi64_rrk_128:
2248; X64:       # %bb.0:
2249; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2250; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
2251; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2252; X64-NEXT:    retq # encoding: [0xc3]
2253  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2254  ret <2 x i64> %res
2255}
2256
2257define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2258; X86-LABEL: test_mask_mullo_epi64_rrkz_128:
2259; X86:       # %bb.0:
2260; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2261; X86-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
2262; X86-NEXT:    retl # encoding: [0xc3]
2263;
2264; X64-LABEL: test_mask_mullo_epi64_rrkz_128:
2265; X64:       # %bb.0:
2266; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2267; X64-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
2268; X64-NEXT:    retq # encoding: [0xc3]
2269  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2270  ret <2 x i64> %res
2271}
2272
2273define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
2274; X86-LABEL: test_mask_mullo_epi64_rm_128:
2275; X86:       # %bb.0:
2276; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2277; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x00]
2278; X86-NEXT:    retl # encoding: [0xc3]
2279;
2280; X64-LABEL: test_mask_mullo_epi64_rm_128:
2281; X64:       # %bb.0:
2282; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
2283; X64-NEXT:    retq # encoding: [0xc3]
2284  %b = load <2 x i64>, <2 x i64>* %ptr_b
2285  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2286  ret <2 x i64> %res
2287}
2288
2289define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2290; X86-LABEL: test_mask_mullo_epi64_rmk_128:
2291; X86:       # %bb.0:
2292; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2293; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2294; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x08]
2295; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2296; X86-NEXT:    retl # encoding: [0xc3]
2297;
2298; X64-LABEL: test_mask_mullo_epi64_rmk_128:
2299; X64:       # %bb.0:
2300; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2301; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
2302; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2303; X64-NEXT:    retq # encoding: [0xc3]
2304  %b = load <2 x i64>, <2 x i64>* %ptr_b
2305  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2306  ret <2 x i64> %res
2307}
2308
2309define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
2310; X86-LABEL: test_mask_mullo_epi64_rmkz_128:
2311; X86:       # %bb.0:
2312; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2313; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2314; X86-NEXT:    vpmullq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x00]
2315; X86-NEXT:    retl # encoding: [0xc3]
2316;
2317; X64-LABEL: test_mask_mullo_epi64_rmkz_128:
2318; X64:       # %bb.0:
2319; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2320; X64-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
2321; X64-NEXT:    retq # encoding: [0xc3]
2322  %b = load <2 x i64>, <2 x i64>* %ptr_b
2323  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2324  ret <2 x i64> %res
2325}
2326
2327define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
2328; X86-LABEL: test_mask_mullo_epi64_rmb_128:
2329; X86:       # %bb.0:
2330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2331; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x00]
2332; X86-NEXT:    retl # encoding: [0xc3]
2333;
2334; X64-LABEL: test_mask_mullo_epi64_rmb_128:
2335; X64:       # %bb.0:
2336; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
2337; X64-NEXT:    retq # encoding: [0xc3]
2338  %q = load i64, i64* %ptr_b
2339  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2340  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2341  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2342  ret <2 x i64> %res
2343}
2344
2345define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2346; X86-LABEL: test_mask_mullo_epi64_rmbk_128:
2347; X86:       # %bb.0:
2348; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2349; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2350; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x08]
2351; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2352; X86-NEXT:    retl # encoding: [0xc3]
2353;
2354; X64-LABEL: test_mask_mullo_epi64_rmbk_128:
2355; X64:       # %bb.0:
2356; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2357; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
2358; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2359; X64-NEXT:    retq # encoding: [0xc3]
2360  %q = load i64, i64* %ptr_b
2361  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2362  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2363  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2364  ret <2 x i64> %res
2365}
2366
2367define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
2368; X86-LABEL: test_mask_mullo_epi64_rmbkz_128:
2369; X86:       # %bb.0:
2370; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2371; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2372; X86-NEXT:    vpmullq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x00]
2373; X86-NEXT:    retl # encoding: [0xc3]
2374;
2375; X64-LABEL: test_mask_mullo_epi64_rmbkz_128:
2376; X64:       # %bb.0:
2377; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2378; X64-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
2379; X64-NEXT:    retq # encoding: [0xc3]
2380  %q = load i64, i64* %ptr_b
2381  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2382  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2383  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2384  ret <2 x i64> %res
2385}
2386
2387declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2388
2389declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
2390
2391define <2 x double>@test_int_x86_avx512_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2) {
2392; CHECK-LABEL: test_int_x86_avx512_vextractf64x2_256:
2393; CHECK:       # %bb.0:
2394; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
2395; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2396; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2397  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
2398  ret <2 x double> %res
2399}
2400
2401define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
2402; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
2403; X86:       # %bb.0:
2404; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2405; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
2406; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
2407; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2408; X86-NEXT:    retl # encoding: [0xc3]
2409;
2410; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
2411; X64:       # %bb.0:
2412; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2413; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
2414; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
2415; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2416; X64-NEXT:    retq # encoding: [0xc3]
2417  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
2418  ret <2 x double> %res
2419}
2420
2421define <2 x double>@test_int_x86_avx512_maskz_vextractf64x2_256(<4 x double> %x0, i8 %x3) {
2422; X86-LABEL: test_int_x86_avx512_maskz_vextractf64x2_256:
2423; X86:       # %bb.0:
2424; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2425; X86-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
2426; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2427; X86-NEXT:    retl # encoding: [0xc3]
2428;
2429; X64-LABEL: test_int_x86_avx512_maskz_vextractf64x2_256:
2430; X64:       # %bb.0:
2431; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2432; X64-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
2433; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2434; X64-NEXT:    retq # encoding: [0xc3]
2435  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
2436  ret <2 x double> %res
2437}
2438
2439declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
2440
2441define <4 x double>@test_int_x86_avx512_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3) {
2442; CHECK-LABEL: test_int_x86_avx512_insertf64x2_256:
2443; CHECK:       # %bb.0:
2444; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
2445; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2446  %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
2447  ret <4 x double> %res
2448}
2449
2450define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
2451; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
2452; X86:       # %bb.0:
2453; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2454; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
2455; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2456; X86-NEXT:    retl # encoding: [0xc3]
2457;
2458; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
2459; X64:       # %bb.0:
2460; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2461; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
2462; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2463; X64-NEXT:    retq # encoding: [0xc3]
2464  %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
2465  ret <4 x double> %res
2466}
2467
2468define <4 x double>@test_int_x86_avx512_maskz_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, i8 %x4) {
2469; X86-LABEL: test_int_x86_avx512_maskz_insertf64x2_256:
2470; X86:       # %bb.0:
2471; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2472; X86-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
2473; X86-NEXT:    retl # encoding: [0xc3]
2474;
2475; X64-LABEL: test_int_x86_avx512_maskz_insertf64x2_256:
2476; X64:       # %bb.0:
2477; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2478; X64-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
2479; X64-NEXT:    retq # encoding: [0xc3]
2480  %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
2481  ret <4 x double> %res
2482}
2483
2484declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
2485
2486define <4 x i64>@test_int_x86_avx512_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3) {
2487; CHECK-LABEL: test_int_x86_avx512_inserti64x2_256:
2488; CHECK:       # %bb.0:
2489; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
2490; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2491  %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
2492  ret <4 x i64> %res
2493}
2494
2495define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
2496; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
2497; X86:       # %bb.0:
2498; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2499; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
2500; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2501; X86-NEXT:    retl # encoding: [0xc3]
2502;
2503; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
2504; X64:       # %bb.0:
2505; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2506; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
2507; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2508; X64-NEXT:    retq # encoding: [0xc3]
2509  %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
2510  ret <4 x i64> %res
2511}
2512
2513define <4 x i64>@test_int_x86_avx512_maskz_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x4) {
2514; X86-LABEL: test_int_x86_avx512_maskz_inserti64x2_256:
2515; X86:       # %bb.0:
2516; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2517; X86-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
2518; X86-NEXT:    retl # encoding: [0xc3]
2519;
2520; X64-LABEL: test_int_x86_avx512_maskz_inserti64x2_256:
2521; X64:       # %bb.0:
2522; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2523; X64-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
2524; X64-NEXT:    retq # encoding: [0xc3]
2525  %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
2526  ret <4 x i64> %res
2527}
2528
2529declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
2530
2531define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
2532; X86-LABEL: test_int_x86_avx512_cvtmask2d_128:
2533; X86:       # %bb.0:
2534; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2535; X86-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
2536; X86-NEXT:    retl # encoding: [0xc3]
2537;
2538; X64-LABEL: test_int_x86_avx512_cvtmask2d_128:
2539; X64:       # %bb.0:
2540; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2541; X64-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
2542; X64-NEXT:    retq # encoding: [0xc3]
2543  %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
2544  ret <4 x i32> %res
2545}
2546
2547declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
2548
2549define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
2550; X86-LABEL: test_int_x86_avx512_cvtmask2d_256:
2551; X86:       # %bb.0:
2552; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2553; X86-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
2554; X86-NEXT:    retl # encoding: [0xc3]
2555;
2556; X64-LABEL: test_int_x86_avx512_cvtmask2d_256:
2557; X64:       # %bb.0:
2558; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2559; X64-NEXT:    vpmovm2d %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
2560; X64-NEXT:    retq # encoding: [0xc3]
2561  %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
2562  ret <8 x i32> %res
2563}
2564
2565declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
2566
2567define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
2568; X86-LABEL: test_int_x86_avx512_cvtmask2q_128:
2569; X86:       # %bb.0:
2570; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2571; X86-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
2572; X86-NEXT:    retl # encoding: [0xc3]
2573;
2574; X64-LABEL: test_int_x86_avx512_cvtmask2q_128:
2575; X64:       # %bb.0:
2576; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2577; X64-NEXT:    vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
2578; X64-NEXT:    retq # encoding: [0xc3]
2579  %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
2580  ret <2 x i64> %res
2581}
2582
2583declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
2584
2585define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
2586; X86-LABEL: test_int_x86_avx512_cvtmask2q_256:
2587; X86:       # %bb.0:
2588; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
2589; X86-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
2590; X86-NEXT:    retl # encoding: [0xc3]
2591;
2592; X64-LABEL: test_int_x86_avx512_cvtmask2q_256:
2593; X64:       # %bb.0:
2594; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
2595; X64-NEXT:    vpmovm2q %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
2596; X64-NEXT:    retq # encoding: [0xc3]
2597  %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
2598  ret <4 x i64> %res
2599}
2600
2601declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
2602
2603define <4 x double>@test_int_x86_avx512_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2) {
2604; CHECK-LABEL: test_int_x86_avx512_broadcastf64x2_256:
2605; CHECK:       # %bb.0:
2606; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2607; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
2608; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2609
2610  %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
2611  ret <4 x double> %res
2612}
2613
2614define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
2615; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
2616; X86:       # %bb.0:
2617; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2618; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2619; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
2620; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
2621; X86-NEXT:    retl # encoding: [0xc3]
2622;
2623; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
2624; X64:       # %bb.0:
2625; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2626; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2627; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x18,0xc8,0x01]
2628; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
2629; X64-NEXT:    retq # encoding: [0xc3]
2630
2631  %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
2632  ret <4 x double> %res
2633}
2634
2635define <4 x double>@test_int_x86_avx512_maskz_broadcastf64x2_256(<2 x double> %x0, i8 %mask) {
2636; X86-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_256:
2637; X86:       # %bb.0:
2638; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2639; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2640; X86-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
2641; X86-NEXT:    retl # encoding: [0xc3]
2642;
2643; X64-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_256:
2644; X64:       # %bb.0:
2645; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2646; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2647; X64-NEXT:    vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc0,0x01]
2648; X64-NEXT:    retq # encoding: [0xc3]
2649
2650  %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
2651  ret <4 x double> %res
2652}
2653
2654define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) {
2655; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
2656; X86:       # %bb.0:
2657; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2658; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2659; X86-NEXT:    vbroadcastf64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x00]
2660; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2661; X86-NEXT:    retl # encoding: [0xc3]
2662;
2663; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load:
2664; X64:       # %bb.0:
2665; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2666; X64-NEXT:    vbroadcastf64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1a,0x07]
2667; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2668; X64-NEXT:    retq # encoding: [0xc3]
2669
2670  %x0 = load <2 x double>, <2 x double>* %x0ptr
2671  %res = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
2672  ret <4 x double> %res
2673}
2674
2675declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
2676
2677define <4 x i64>@test_int_x86_avx512_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2) {
2678; CHECK-LABEL: test_int_x86_avx512_broadcasti64x2_256:
2679; CHECK:       # %bb.0:
2680; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2681; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
2682; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2683
2684  %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
2685  ret <4 x i64> %res
2686}
2687
2688define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
2689; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
2690; X86:       # %bb.0:
2691; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2692; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2693; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
2694; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2695; X86-NEXT:    retl # encoding: [0xc3]
2696;
2697; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
2698; X64:       # %bb.0:
2699; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2700; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2701; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x38,0xc8,0x01]
2702; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2703; X64-NEXT:    retq # encoding: [0xc3]
2704
2705  %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
2706  ret <4 x i64> %res
2707}
2708
2709define <4 x i64>@test_int_x86_avx512_maskz_broadcasti64x2_256(<2 x i64> %x0, i8 %mask) {
2710; X86-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_256:
2711; X86:       # %bb.0:
2712; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2713; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2714; X86-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
2715; X86-NEXT:    retl # encoding: [0xc3]
2716;
2717; X64-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_256:
2718; X64:       # %bb.0:
2719; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
2720; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2721; X64-NEXT:    vinserti64x2 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc0,0x01]
2722; X64-NEXT:    retq # encoding: [0xc3]
2723
2724  %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
2725  ret <4 x i64> %res
2726}
2727
2728define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) {
2729; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
2730; X86:       # %bb.0:
2731; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2732; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
2733; X86-NEXT:    vbroadcasti64x2 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x00]
2734; X86-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2735; X86-NEXT:    retl # encoding: [0xc3]
2736;
2737; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load:
2738; X64:       # %bb.0:
2739; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2740; X64-NEXT:    vbroadcasti64x2 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x5a,0x07]
2741; X64-NEXT:    # ymm0 {%k1} = mem[0,1,0,1]
2742; X64-NEXT:    retq # encoding: [0xc3]
2743
2744  %x0 = load <2 x i64>, <2 x i64>* %x0ptr
2745  %res = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
2746  ret <4 x i64> %res
2747}
2748
2749declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
2750
2751define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
2752; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
2753; CHECK:       # %bb.0:
2754; CHECK-NEXT:    vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
2755; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2756; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2757; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2758    %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
2759    ret i8 %res
2760}
2761
2762declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
2763
2764define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
2765; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
2766; CHECK:       # %bb.0:
2767; CHECK-NEXT:    vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0]
2768; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2769; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2770; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2771    %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
2772    ret i8 %res
2773}
2774
2775declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
2776
2777define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
2778; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
2779; CHECK:       # %bb.0:
2780; CHECK-NEXT:    vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
2781; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2782; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2783; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2784    %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
2785    ret i8 %res
2786}
2787
2788declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
2789
2790define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
2791; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
2792; CHECK:       # %bb.0:
2793; CHECK-NEXT:    vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
2794; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2795; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2796; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2797; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2798    %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
2799    ret i8 %res
2800}
2801
2802declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
2803
2804define <2 x double>@test_int_x86_avx512_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1) {
2805; CHECK-LABEL: test_int_x86_avx512_cvt_qq2pd_128:
2806; CHECK:       # %bb.0:
2807; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0]
2808; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2809  %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
2810  ret <2 x double> %res
2811}
2812
2813define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
2814; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
2815; X86:       # %bb.0:
2816; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2817; X86-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
2818; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
2819; X86-NEXT:    retl # encoding: [0xc3]
2820;
2821; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
2822; X64:       # %bb.0:
2823; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2824; X64-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
2825; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
2826; X64-NEXT:    retq # encoding: [0xc3]
2827  %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
2828  ret <2 x double> %res
2829}
2830
2831declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
2832
2833define <4 x double>@test_int_x86_avx512_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1) {
2834; CHECK-LABEL: test_int_x86_avx512_cvt_qq2pd_256:
2835; CHECK:       # %bb.0:
2836; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0]
2837; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2838  %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
2839  ret <4 x double> %res
2840}
2841
2842define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
2843; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
2844; X86:       # %bb.0:
2845; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2846; X86-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
2847; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
2848; X86-NEXT:    retl # encoding: [0xc3]
2849;
2850; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
2851; X64:       # %bb.0:
2852; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2853; X64-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
2854; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
2855; X64-NEXT:    retq # encoding: [0xc3]
2856  %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
2857  ret <4 x double> %res
2858}
2859
2860declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
2861
2862define <2 x double>@test_int_x86_avx512_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1) {
2863; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2pd_128:
2864; CHECK:       # %bb.0:
2865; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0]
2866; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2867  %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
2868  ret <2 x double> %res
2869}
2870
2871define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
2872; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
2873; X86:       # %bb.0:
2874; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2875; X86-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
2876; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
2877; X86-NEXT:    retl # encoding: [0xc3]
2878;
2879; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
2880; X64:       # %bb.0:
2881; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2882; X64-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
2883; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
2884; X64-NEXT:    retq # encoding: [0xc3]
2885  %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
2886  ret <2 x double> %res
2887}
2888
2889declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
2890
2891define <4 x double>@test_int_x86_avx512_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1) {
2892; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2pd_256:
2893; CHECK:       # %bb.0:
2894; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0]
2895; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2896  %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
2897  ret <4 x double> %res
2898}
2899
2900define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
2901; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
2902; X86:       # %bb.0:
2903; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2904; X86-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
2905; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
2906; X86-NEXT:    retl # encoding: [0xc3]
2907;
2908; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
2909; X64:       # %bb.0:
2910; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2911; X64-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
2912; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
2913; X64-NEXT:    retq # encoding: [0xc3]
2914  %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
2915  ret <4 x double> %res
2916}
2917
2918declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
2919
2920define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
2921; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
2922; CHECK:       # %bb.0:
2923; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
2924; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
2925; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2926; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2927; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2928  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1)
2929  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res)
2930  ret i8 %res1
2931}
2932
2933declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
2934
2935define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
2936; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
2937; CHECK:       # %bb.0:
2938; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
2939; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
2940; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2941; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2942; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2943; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2944  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1)
2945  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res)
2946  ret i8 %res1
2947}
2948
2949declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
2950
2951define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
2952; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
2953; CHECK:       # %bb.0:
2954; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
2955; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
2956; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2957; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2958; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2959  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1)
2960  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res)
2961  ret i8 %res1
2962}
2963
2964declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
2965
2966define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
2967; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
2968; CHECK:       # %bb.0:
2969; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
2970; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
2971; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2972; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2973; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2974; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2975  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1)
2976  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
2977  ret i8 %res1
2978}
2979
2980declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
2981
2982define <4 x float> @test_int_x86_avx512_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1) {
2983; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_256:
2984; CHECK:       # %bb.0:
2985; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
2986; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2987; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2988  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
2989  ret <4 x float> %res
2990}
2991
2992define <4 x float> @test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
2993; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
2994; X86:       # %bb.0:
2995; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
2996; X86-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
2997; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
2998; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2999; X86-NEXT:    retl # encoding: [0xc3]
3000;
3001; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
3002; X64:       # %bb.0:
3003; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3004; X64-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
3005; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3006; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3007; X64-NEXT:    retq # encoding: [0xc3]
3008  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
3009  ret <4 x float> %res
3010}
3011
3012declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
3013
3014define <4 x float> @test_int_x86_avx512_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1) {
3015; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_256:
3016; CHECK:       # %bb.0:
3017; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
3018; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3019; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3020  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
3021  ret <4 x float> %res
3022}
3023
3024define <4 x float> @test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
3025; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3026; X86:       # %bb.0:
3027; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
3028; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3029; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3030; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3031; X86-NEXT:    retl # encoding: [0xc3]
3032;
3033; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
3034; X64:       # %bb.0:
3035; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3036; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
3037; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3038; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
3039; X64-NEXT:    retq # encoding: [0xc3]
3040  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
3041  ret <4 x float> %res
3042}
3043