1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
5; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
6; CHECK:       ## BB#0:
7; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
8; CHECK-NEXT:    retq ## encoding: [0xc3]
9  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
10  ret <8 x i64> %res
11}
12
13define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
14; CHECK-LABEL: test_mask_mullo_epi64_rrk_512:
15; CHECK:       ## BB#0:
16; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
17; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
18; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
19; CHECK-NEXT:    retq ## encoding: [0xc3]
20  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
21  ret <8 x i64> %res
22}
23
24define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
25; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512:
26; CHECK:       ## BB#0:
27; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
28; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
29; CHECK-NEXT:    retq ## encoding: [0xc3]
30  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
31  ret <8 x i64> %res
32}
33
34define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
35; CHECK-LABEL: test_mask_mullo_epi64_rm_512:
36; CHECK:       ## BB#0:
37; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
38; CHECK-NEXT:    retq ## encoding: [0xc3]
39  %b = load <8 x i64>, <8 x i64>* %ptr_b
40  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
41  ret <8 x i64> %res
42}
43
44define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
45; CHECK-LABEL: test_mask_mullo_epi64_rmk_512:
46; CHECK:       ## BB#0:
47; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
48; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
49; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
50; CHECK-NEXT:    retq ## encoding: [0xc3]
51  %b = load <8 x i64>, <8 x i64>* %ptr_b
52  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
53  ret <8 x i64> %res
54}
55
56define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
57; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512:
58; CHECK:       ## BB#0:
59; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
60; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
61; CHECK-NEXT:    retq ## encoding: [0xc3]
62  %b = load <8 x i64>, <8 x i64>* %ptr_b
63  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
64  ret <8 x i64> %res
65}
66
67define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
68; CHECK-LABEL: test_mask_mullo_epi64_rmb_512:
69; CHECK:       ## BB#0:
70; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
71; CHECK-NEXT:    retq ## encoding: [0xc3]
72  %q = load i64, i64* %ptr_b
73  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
74  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
75  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
76  ret <8 x i64> %res
77}
78
79define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
80; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512:
81; CHECK:       ## BB#0:
82; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
83; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
84; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
85; CHECK-NEXT:    retq ## encoding: [0xc3]
86  %q = load i64, i64* %ptr_b
87  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
88  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
89  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
90  ret <8 x i64> %res
91}
92
93define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
94; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512:
95; CHECK:       ## BB#0:
96; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
97; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
98; CHECK-NEXT:    retq ## encoding: [0xc3]
99  %q = load i64, i64* %ptr_b
100  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
101  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
102  %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
103  ret <8 x i64> %res
104}
105declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
106
107define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
108; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
109; CHECK:       ## BB#0:
110; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
111; CHECK-NEXT:    retq ## encoding: [0xc3]
112  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
113  ret <4 x i64> %res
114}
115
116define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
117; CHECK-LABEL: test_mask_mullo_epi64_rrk_256:
118; CHECK:       ## BB#0:
119; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
120; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
121; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
122; CHECK-NEXT:    retq ## encoding: [0xc3]
123  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
124  ret <4 x i64> %res
125}
126
127define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
128; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256:
129; CHECK:       ## BB#0:
130; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
131; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
132; CHECK-NEXT:    retq ## encoding: [0xc3]
133  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
134  ret <4 x i64> %res
135}
136
137define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
138; CHECK-LABEL: test_mask_mullo_epi64_rm_256:
139; CHECK:       ## BB#0:
140; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
141; CHECK-NEXT:    retq ## encoding: [0xc3]
142  %b = load <4 x i64>, <4 x i64>* %ptr_b
143  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
144  ret <4 x i64> %res
145}
146
147define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
148; CHECK-LABEL: test_mask_mullo_epi64_rmk_256:
149; CHECK:       ## BB#0:
150; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
151; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
152; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
153; CHECK-NEXT:    retq ## encoding: [0xc3]
154  %b = load <4 x i64>, <4 x i64>* %ptr_b
155  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
156  ret <4 x i64> %res
157}
158
159define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
160; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256:
161; CHECK:       ## BB#0:
162; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
163; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
164; CHECK-NEXT:    retq ## encoding: [0xc3]
165  %b = load <4 x i64>, <4 x i64>* %ptr_b
166  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
167  ret <4 x i64> %res
168}
169
170define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
171; CHECK-LABEL: test_mask_mullo_epi64_rmb_256:
172; CHECK:       ## BB#0:
173; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
174; CHECK-NEXT:    retq ## encoding: [0xc3]
175  %q = load i64, i64* %ptr_b
176  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
177  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
178  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
179  ret <4 x i64> %res
180}
181
182define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
183; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256:
184; CHECK:       ## BB#0:
185; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
186; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
187; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
188; CHECK-NEXT:    retq ## encoding: [0xc3]
189  %q = load i64, i64* %ptr_b
190  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
191  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
192  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
193  ret <4 x i64> %res
194}
195
196define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
197; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256:
198; CHECK:       ## BB#0:
199; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
200; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
201; CHECK-NEXT:    retq ## encoding: [0xc3]
202  %q = load i64, i64* %ptr_b
203  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
204  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
205  %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
206  ret <4 x i64> %res
207}
208
209declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
210
211define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
212; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
213; CHECK:       ## BB#0:
214; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
215; CHECK-NEXT:    retq ## encoding: [0xc3]
216  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
217  ret <2 x i64> %res
218}
219
220define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
221; CHECK-LABEL: test_mask_mullo_epi64_rrk_128:
222; CHECK:       ## BB#0:
223; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
224; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
225; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
226; CHECK-NEXT:    retq ## encoding: [0xc3]
227  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
228  ret <2 x i64> %res
229}
230
231define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
232; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128:
233; CHECK:       ## BB#0:
234; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
235; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
236; CHECK-NEXT:    retq ## encoding: [0xc3]
237  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
238  ret <2 x i64> %res
239}
240
241define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
242; CHECK-LABEL: test_mask_mullo_epi64_rm_128:
243; CHECK:       ## BB#0:
244; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
245; CHECK-NEXT:    retq ## encoding: [0xc3]
246  %b = load <2 x i64>, <2 x i64>* %ptr_b
247  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
248  ret <2 x i64> %res
249}
250
251define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
252; CHECK-LABEL: test_mask_mullo_epi64_rmk_128:
253; CHECK:       ## BB#0:
254; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
255; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
256; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
257; CHECK-NEXT:    retq ## encoding: [0xc3]
258  %b = load <2 x i64>, <2 x i64>* %ptr_b
259  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
260  ret <2 x i64> %res
261}
262
263define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
264; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128:
265; CHECK:       ## BB#0:
266; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
267; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
268; CHECK-NEXT:    retq ## encoding: [0xc3]
269  %b = load <2 x i64>, <2 x i64>* %ptr_b
270  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
271  ret <2 x i64> %res
272}
273
274define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
275; CHECK-LABEL: test_mask_mullo_epi64_rmb_128:
276; CHECK:       ## BB#0:
277; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
278; CHECK-NEXT:    retq ## encoding: [0xc3]
279  %q = load i64, i64* %ptr_b
280  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
281  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
282  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
283  ret <2 x i64> %res
284}
285
286define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
287; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128:
288; CHECK:       ## BB#0:
289; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
290; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
291; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
292; CHECK-NEXT:    retq ## encoding: [0xc3]
293  %q = load i64, i64* %ptr_b
294  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
295  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
296  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
297  ret <2 x i64> %res
298}
299
300define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
301; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128:
302; CHECK:       ## BB#0:
303; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
304; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
305; CHECK-NEXT:    retq ## encoding: [0xc3]
306  %q = load i64, i64* %ptr_b
307  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
308  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
309  %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
310  ret <2 x i64> %res
311}
312
313declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
314
315define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
316; CHECK-LABEL: test_mask_andnot_ps_rr_128:
317; CHECK:       ## BB#0:
318; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1]
319; CHECK-NEXT:    retq ## encoding: [0xc3]
320  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
321  ret <4 x float> %res
322}
323
324define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
325; CHECK-LABEL: test_mask_andnot_ps_rrk_128:
326; CHECK:       ## BB#0:
327; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
328; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
329; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
330; CHECK-NEXT:    retq ## encoding: [0xc3]
331  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
332  ret <4 x float> %res
333}
334
335define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
336; CHECK-LABEL: test_mask_andnot_ps_rrkz_128:
337; CHECK:       ## BB#0:
338; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
339; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
340; CHECK-NEXT:    retq ## encoding: [0xc3]
341  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
342  ret <4 x float> %res
343}
344
345define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
346; CHECK-LABEL: test_mask_andnot_ps_rm_128:
347; CHECK:       ## BB#0:
348; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07]
349; CHECK-NEXT:    retq ## encoding: [0xc3]
350  %b = load <4 x float>, <4 x float>* %ptr_b
351  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
352  ret <4 x float> %res
353}
354
355define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
356; CHECK-LABEL: test_mask_andnot_ps_rmk_128:
357; CHECK:       ## BB#0:
358; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
359; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
360; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
361; CHECK-NEXT:    retq ## encoding: [0xc3]
362  %b = load <4 x float>, <4 x float>* %ptr_b
363  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
364  ret <4 x float> %res
365}
366
367define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
368; CHECK-LABEL: test_mask_andnot_ps_rmkz_128:
369; CHECK:       ## BB#0:
370; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
371; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
372; CHECK-NEXT:    retq ## encoding: [0xc3]
373  %b = load <4 x float>, <4 x float>* %ptr_b
374  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
375  ret <4 x float> %res
376}
377
378define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
379; CHECK-LABEL: test_mask_andnot_ps_rmb_128:
380; CHECK:       ## BB#0:
381; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
382; CHECK-NEXT:    retq ## encoding: [0xc3]
383  %q = load float, float* %ptr_b
384  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
385  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
386  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
387  ret <4 x float> %res
388}
389
390define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
391; CHECK-LABEL: test_mask_andnot_ps_rmbk_128:
392; CHECK:       ## BB#0:
393; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
394; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
395; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
396; CHECK-NEXT:    retq ## encoding: [0xc3]
397  %q = load float, float* %ptr_b
398  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
399  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
400  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
401  ret <4 x float> %res
402}
403
404define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
405; CHECK-LABEL: test_mask_andnot_ps_rmbkz_128:
406; CHECK:       ## BB#0:
407; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
408; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
409; CHECK-NEXT:    retq ## encoding: [0xc3]
410  %q = load float, float* %ptr_b
411  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
412  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
413  %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
414  ret <4 x float> %res
415}
416
417declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
418
419define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
420; CHECK-LABEL: test_mask_andnot_ps_rr_256:
421; CHECK:       ## BB#0:
422; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1]
423; CHECK-NEXT:    retq ## encoding: [0xc3]
424  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
425  ret <8 x float> %res
426}
427
428define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
429; CHECK-LABEL: test_mask_andnot_ps_rrk_256:
430; CHECK:       ## BB#0:
431; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
432; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
433; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
434; CHECK-NEXT:    retq ## encoding: [0xc3]
435  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
436  ret <8 x float> %res
437}
438
439define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
440; CHECK-LABEL: test_mask_andnot_ps_rrkz_256:
441; CHECK:       ## BB#0:
442; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
443; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
444; CHECK-NEXT:    retq ## encoding: [0xc3]
445  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
446  ret <8 x float> %res
447}
448
449define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
450; CHECK-LABEL: test_mask_andnot_ps_rm_256:
451; CHECK:       ## BB#0:
452; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07]
453; CHECK-NEXT:    retq ## encoding: [0xc3]
454  %b = load <8 x float>, <8 x float>* %ptr_b
455  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
456  ret <8 x float> %res
457}
458
459define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
460; CHECK-LABEL: test_mask_andnot_ps_rmk_256:
461; CHECK:       ## BB#0:
462; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
463; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
464; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
465; CHECK-NEXT:    retq ## encoding: [0xc3]
466  %b = load <8 x float>, <8 x float>* %ptr_b
467  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
468  ret <8 x float> %res
469}
470
471define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
472; CHECK-LABEL: test_mask_andnot_ps_rmkz_256:
473; CHECK:       ## BB#0:
474; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
475; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
476; CHECK-NEXT:    retq ## encoding: [0xc3]
477  %b = load <8 x float>, <8 x float>* %ptr_b
478  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
479  ret <8 x float> %res
480}
481
482define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
483; CHECK-LABEL: test_mask_andnot_ps_rmb_256:
484; CHECK:       ## BB#0:
485; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
486; CHECK-NEXT:    retq ## encoding: [0xc3]
487  %q = load float, float* %ptr_b
488  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
489  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
490  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
491  ret <8 x float> %res
492}
493
494define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
495; CHECK-LABEL: test_mask_andnot_ps_rmbk_256:
496; CHECK:       ## BB#0:
497; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
498; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
499; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
500; CHECK-NEXT:    retq ## encoding: [0xc3]
501  %q = load float, float* %ptr_b
502  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
503  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
504  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
505  ret <8 x float> %res
506}
507
508define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
509; CHECK-LABEL: test_mask_andnot_ps_rmbkz_256:
510; CHECK:       ## BB#0:
511; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
512; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
513; CHECK-NEXT:    retq ## encoding: [0xc3]
514  %q = load float, float* %ptr_b
515  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
516  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
517  %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
518  ret <8 x float> %res
519}
520
521declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
522
523define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
524; CHECK-LABEL: test_mask_andnot_ps_rr_512:
525; CHECK:       ## BB#0:
526; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
527; CHECK-NEXT:    retq ## encoding: [0xc3]
528  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
529  ret <16 x float> %res
530}
531
532define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
533; CHECK-LABEL: test_mask_andnot_ps_rrk_512:
534; CHECK:       ## BB#0:
535; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
536; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
537; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
538; CHECK-NEXT:    retq ## encoding: [0xc3]
539  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
540  ret <16 x float> %res
541}
542
543define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
544; CHECK-LABEL: test_mask_andnot_ps_rrkz_512:
545; CHECK:       ## BB#0:
546; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
547; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
548; CHECK-NEXT:    retq ## encoding: [0xc3]
549  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
550  ret <16 x float> %res
551}
552
553define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
554; CHECK-LABEL: test_mask_andnot_ps_rm_512:
555; CHECK:       ## BB#0:
556; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
557; CHECK-NEXT:    retq ## encoding: [0xc3]
558  %b = load <16 x float>, <16 x float>* %ptr_b
559  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
560  ret <16 x float> %res
561}
562
563define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
564; CHECK-LABEL: test_mask_andnot_ps_rmk_512:
565; CHECK:       ## BB#0:
566; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
567; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
568; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
569; CHECK-NEXT:    retq ## encoding: [0xc3]
570  %b = load <16 x float>, <16 x float>* %ptr_b
571  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
572  ret <16 x float> %res
573}
574
575define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
576; CHECK-LABEL: test_mask_andnot_ps_rmkz_512:
577; CHECK:       ## BB#0:
578; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
579; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
580; CHECK-NEXT:    retq ## encoding: [0xc3]
581  %b = load <16 x float>, <16 x float>* %ptr_b
582  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
583  ret <16 x float> %res
584}
585
586define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
587; CHECK-LABEL: test_mask_andnot_ps_rmb_512:
588; CHECK:       ## BB#0:
589; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
590; CHECK-NEXT:    retq ## encoding: [0xc3]
591  %q = load float, float* %ptr_b
592  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
593  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
594  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
595  ret <16 x float> %res
596}
597
598define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
599; CHECK-LABEL: test_mask_andnot_ps_rmbk_512:
600; CHECK:       ## BB#0:
601; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
602; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
603; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
604; CHECK-NEXT:    retq ## encoding: [0xc3]
605  %q = load float, float* %ptr_b
606  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
607  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
608  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
609  ret <16 x float> %res
610}
611
612define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
613; CHECK-LABEL: test_mask_andnot_ps_rmbkz_512:
614; CHECK:       ## BB#0:
615; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
616; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
617; CHECK-NEXT:    retq ## encoding: [0xc3]
618  %q = load float, float* %ptr_b
619  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
620  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
621  %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
622  ret <16 x float> %res
623}
624
625declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
626
627define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
628; CHECK-LABEL: test_mask_and_ps_rr_128:
629; CHECK:       ## BB#0:
630; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1]
631; CHECK-NEXT:    retq ## encoding: [0xc3]
632  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
633  ret <4 x float> %res
634}
635
636define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
637; CHECK-LABEL: test_mask_and_ps_rrk_128:
638; CHECK:       ## BB#0:
639; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
640; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
641; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
642; CHECK-NEXT:    retq ## encoding: [0xc3]
643  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
644  ret <4 x float> %res
645}
646
647define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
648; CHECK-LABEL: test_mask_and_ps_rrkz_128:
649; CHECK:       ## BB#0:
650; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
651; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
652; CHECK-NEXT:    retq ## encoding: [0xc3]
653  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
654  ret <4 x float> %res
655}
656
657define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
658; CHECK-LABEL: test_mask_and_ps_rm_128:
659; CHECK:       ## BB#0:
660; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0x07]
661; CHECK-NEXT:    retq ## encoding: [0xc3]
662  %b = load <4 x float>, <4 x float>* %ptr_b
663  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
664  ret <4 x float> %res
665}
666
667define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
668; CHECK-LABEL: test_mask_and_ps_rmk_128:
669; CHECK:       ## BB#0:
670; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
671; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
672; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
673; CHECK-NEXT:    retq ## encoding: [0xc3]
674  %b = load <4 x float>, <4 x float>* %ptr_b
675  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
676  ret <4 x float> %res
677}
678
679define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
680; CHECK-LABEL: test_mask_and_ps_rmkz_128:
681; CHECK:       ## BB#0:
682; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
683; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
684; CHECK-NEXT:    retq ## encoding: [0xc3]
685  %b = load <4 x float>, <4 x float>* %ptr_b
686  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
687  ret <4 x float> %res
688}
689
690define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
691; CHECK-LABEL: test_mask_and_ps_rmb_128:
692; CHECK:       ## BB#0:
693; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
694; CHECK-NEXT:    retq ## encoding: [0xc3]
695  %q = load float, float* %ptr_b
696  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
697  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
698  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
699  ret <4 x float> %res
700}
701
702define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
703; CHECK-LABEL: test_mask_and_ps_rmbk_128:
704; CHECK:       ## BB#0:
705; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
706; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
707; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
708; CHECK-NEXT:    retq ## encoding: [0xc3]
709  %q = load float, float* %ptr_b
710  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
711  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
712  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
713  ret <4 x float> %res
714}
715
716define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
717; CHECK-LABEL: test_mask_and_ps_rmbkz_128:
718; CHECK:       ## BB#0:
719; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
720; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
721; CHECK-NEXT:    retq ## encoding: [0xc3]
722  %q = load float, float* %ptr_b
723  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
724  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
725  %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
726  ret <4 x float> %res
727}
728
729declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
730
731define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
732; CHECK-LABEL: test_mask_and_ps_rr_256:
733; CHECK:       ## BB#0:
734; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0xc1]
735; CHECK-NEXT:    retq ## encoding: [0xc3]
736  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
737  ret <8 x float> %res
738}
739
740define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
741; CHECK-LABEL: test_mask_and_ps_rrk_256:
742; CHECK:       ## BB#0:
743; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
744; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
745; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
746; CHECK-NEXT:    retq ## encoding: [0xc3]
747  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
748  ret <8 x float> %res
749}
750
751define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
752; CHECK-LABEL: test_mask_and_ps_rrkz_256:
753; CHECK:       ## BB#0:
754; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
755; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
756; CHECK-NEXT:    retq ## encoding: [0xc3]
757  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
758  ret <8 x float> %res
759}
760
761define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
762; CHECK-LABEL: test_mask_and_ps_rm_256:
763; CHECK:       ## BB#0:
764; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0x07]
765; CHECK-NEXT:    retq ## encoding: [0xc3]
766  %b = load <8 x float>, <8 x float>* %ptr_b
767  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
768  ret <8 x float> %res
769}
770
771define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
772; CHECK-LABEL: test_mask_and_ps_rmk_256:
773; CHECK:       ## BB#0:
774; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
775; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
776; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
777; CHECK-NEXT:    retq ## encoding: [0xc3]
778  %b = load <8 x float>, <8 x float>* %ptr_b
779  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
780  ret <8 x float> %res
781}
782
783define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
784; CHECK-LABEL: test_mask_and_ps_rmkz_256:
785; CHECK:       ## BB#0:
786; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
787; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
788; CHECK-NEXT:    retq ## encoding: [0xc3]
789  %b = load <8 x float>, <8 x float>* %ptr_b
790  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
791  ret <8 x float> %res
792}
793
794define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
795; CHECK-LABEL: test_mask_and_ps_rmb_256:
796; CHECK:       ## BB#0:
797; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
798; CHECK-NEXT:    retq ## encoding: [0xc3]
799  %q = load float, float* %ptr_b
800  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
801  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
802  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
803  ret <8 x float> %res
804}
805
806define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
807; CHECK-LABEL: test_mask_and_ps_rmbk_256:
808; CHECK:       ## BB#0:
809; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
810; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
811; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
812; CHECK-NEXT:    retq ## encoding: [0xc3]
813  %q = load float, float* %ptr_b
814  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
815  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
816  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
817  ret <8 x float> %res
818}
819
820define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
821; CHECK-LABEL: test_mask_and_ps_rmbkz_256:
822; CHECK:       ## BB#0:
823; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
824; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
825; CHECK-NEXT:    retq ## encoding: [0xc3]
826  %q = load float, float* %ptr_b
827  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
828  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
829  %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
830  ret <8 x float> %res
831}
832
833declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
834
835define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
836; CHECK-LABEL: test_mask_and_ps_rr_512:
837; CHECK:       ## BB#0:
838; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
839; CHECK-NEXT:    retq ## encoding: [0xc3]
840  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
841  ret <16 x float> %res
842}
843
844define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
845; CHECK-LABEL: test_mask_and_ps_rrk_512:
846; CHECK:       ## BB#0:
847; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
848; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
849; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
850; CHECK-NEXT:    retq ## encoding: [0xc3]
851  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
852  ret <16 x float> %res
853}
854
855define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
856; CHECK-LABEL: test_mask_and_ps_rrkz_512:
857; CHECK:       ## BB#0:
858; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
859; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
860; CHECK-NEXT:    retq ## encoding: [0xc3]
861  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
862  ret <16 x float> %res
863}
864
865define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
866; CHECK-LABEL: test_mask_and_ps_rm_512:
867; CHECK:       ## BB#0:
868; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
869; CHECK-NEXT:    retq ## encoding: [0xc3]
870  %b = load <16 x float>, <16 x float>* %ptr_b
871  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
872  ret <16 x float> %res
873}
874
875define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
876; CHECK-LABEL: test_mask_and_ps_rmk_512:
877; CHECK:       ## BB#0:
878; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
879; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
880; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
881; CHECK-NEXT:    retq ## encoding: [0xc3]
882  %b = load <16 x float>, <16 x float>* %ptr_b
883  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
884  ret <16 x float> %res
885}
886
887define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
888; CHECK-LABEL: test_mask_and_ps_rmkz_512:
889; CHECK:       ## BB#0:
890; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
891; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
892; CHECK-NEXT:    retq ## encoding: [0xc3]
893  %b = load <16 x float>, <16 x float>* %ptr_b
894  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
895  ret <16 x float> %res
896}
897
898define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
899; CHECK-LABEL: test_mask_and_ps_rmb_512:
900; CHECK:       ## BB#0:
901; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
902; CHECK-NEXT:    retq ## encoding: [0xc3]
903  %q = load float, float* %ptr_b
904  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
905  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
906  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
907  ret <16 x float> %res
908}
909
910define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
911; CHECK-LABEL: test_mask_and_ps_rmbk_512:
912; CHECK:       ## BB#0:
913; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
914; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
915; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
916; CHECK-NEXT:    retq ## encoding: [0xc3]
917  %q = load float, float* %ptr_b
918  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
919  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
920  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
921  ret <16 x float> %res
922}
923
924define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
925; CHECK-LABEL: test_mask_and_ps_rmbkz_512:
926; CHECK:       ## BB#0:
927; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
928; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
929; CHECK-NEXT:    retq ## encoding: [0xc3]
930  %q = load float, float* %ptr_b
931  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
932  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
933  %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
934  ret <16 x float> %res
935}
936
937declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
938
939define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
940; CHECK-LABEL: test_mask_or_ps_rr_128:
941; CHECK:       ## BB#0:
942; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1]
943; CHECK-NEXT:    retq ## encoding: [0xc3]
944  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
945  ret <4 x float> %res
946}
947
948define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
949; CHECK-LABEL: test_mask_or_ps_rrk_128:
950; CHECK:       ## BB#0:
951; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
952; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
953; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
954; CHECK-NEXT:    retq ## encoding: [0xc3]
955  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
956  ret <4 x float> %res
957}
958
959define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
960; CHECK-LABEL: test_mask_or_ps_rrkz_128:
961; CHECK:       ## BB#0:
962; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
963; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
964; CHECK-NEXT:    retq ## encoding: [0xc3]
965  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
966  ret <4 x float> %res
967}
968
969define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
970; CHECK-LABEL: test_mask_or_ps_rm_128:
971; CHECK:       ## BB#0:
972; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0x07]
973; CHECK-NEXT:    retq ## encoding: [0xc3]
974  %b = load <4 x float>, <4 x float>* %ptr_b
975  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
976  ret <4 x float> %res
977}
978
979define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
980; CHECK-LABEL: test_mask_or_ps_rmk_128:
981; CHECK:       ## BB#0:
982; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
983; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
984; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
985; CHECK-NEXT:    retq ## encoding: [0xc3]
986  %b = load <4 x float>, <4 x float>* %ptr_b
987  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
988  ret <4 x float> %res
989}
990
991define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
992; CHECK-LABEL: test_mask_or_ps_rmkz_128:
993; CHECK:       ## BB#0:
994; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
995; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
996; CHECK-NEXT:    retq ## encoding: [0xc3]
997  %b = load <4 x float>, <4 x float>* %ptr_b
998  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
999  ret <4 x float> %res
1000}
1001
1002define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1003; CHECK-LABEL: test_mask_or_ps_rmb_128:
1004; CHECK:       ## BB#0:
1005; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
1006; CHECK-NEXT:    retq ## encoding: [0xc3]
1007  %q = load float, float* %ptr_b
1008  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1009  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1010  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1011  ret <4 x float> %res
1012}
1013
1014define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1015; CHECK-LABEL: test_mask_or_ps_rmbk_128:
1016; CHECK:       ## BB#0:
1017; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1018; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
1019; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1020; CHECK-NEXT:    retq ## encoding: [0xc3]
1021  %q = load float, float* %ptr_b
1022  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1023  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1024  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1025  ret <4 x float> %res
1026}
1027
1028define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1029; CHECK-LABEL: test_mask_or_ps_rmbkz_128:
1030; CHECK:       ## BB#0:
1031; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1032; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
1033; CHECK-NEXT:    retq ## encoding: [0xc3]
1034  %q = load float, float* %ptr_b
1035  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1036  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1037  %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1038  ret <4 x float> %res
1039}
1040
1041declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1042
1043define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1044; CHECK-LABEL: test_mask_or_ps_rr_256:
1045; CHECK:       ## BB#0:
1046; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0xc1]
1047; CHECK-NEXT:    retq ## encoding: [0xc3]
1048  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1049  ret <8 x float> %res
1050}
1051
1052define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1053; CHECK-LABEL: test_mask_or_ps_rrk_256:
1054; CHECK:       ## BB#0:
1055; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1056; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
1057; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1058; CHECK-NEXT:    retq ## encoding: [0xc3]
1059  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1060  ret <8 x float> %res
1061}
1062
1063define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1064; CHECK-LABEL: test_mask_or_ps_rrkz_256:
1065; CHECK:       ## BB#0:
1066; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1067; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
1068; CHECK-NEXT:    retq ## encoding: [0xc3]
1069  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1070  ret <8 x float> %res
1071}
1072
1073define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1074; CHECK-LABEL: test_mask_or_ps_rm_256:
1075; CHECK:       ## BB#0:
1076; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0x07]
1077; CHECK-NEXT:    retq ## encoding: [0xc3]
1078  %b = load <8 x float>, <8 x float>* %ptr_b
1079  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1080  ret <8 x float> %res
1081}
1082
1083define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1084; CHECK-LABEL: test_mask_or_ps_rmk_256:
1085; CHECK:       ## BB#0:
1086; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1087; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
1088; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1089; CHECK-NEXT:    retq ## encoding: [0xc3]
1090  %b = load <8 x float>, <8 x float>* %ptr_b
1091  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1092  ret <8 x float> %res
1093}
1094
1095define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1096; CHECK-LABEL: test_mask_or_ps_rmkz_256:
1097; CHECK:       ## BB#0:
1098; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1099; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
1100; CHECK-NEXT:    retq ## encoding: [0xc3]
1101  %b = load <8 x float>, <8 x float>* %ptr_b
1102  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1103  ret <8 x float> %res
1104}
1105
1106define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1107; CHECK-LABEL: test_mask_or_ps_rmb_256:
1108; CHECK:       ## BB#0:
1109; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
1110; CHECK-NEXT:    retq ## encoding: [0xc3]
1111  %q = load float, float* %ptr_b
1112  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1113  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1114  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1115  ret <8 x float> %res
1116}
1117
1118define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1119; CHECK-LABEL: test_mask_or_ps_rmbk_256:
1120; CHECK:       ## BB#0:
1121; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1122; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
1123; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1124; CHECK-NEXT:    retq ## encoding: [0xc3]
1125  %q = load float, float* %ptr_b
1126  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1127  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1128  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1129  ret <8 x float> %res
1130}
1131
1132define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1133; CHECK-LABEL: test_mask_or_ps_rmbkz_256:
1134; CHECK:       ## BB#0:
1135; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1136; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
1137; CHECK-NEXT:    retq ## encoding: [0xc3]
1138  %q = load float, float* %ptr_b
1139  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1140  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1141  %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1142  ret <8 x float> %res
1143}
1144
1145declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1146
1147define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1148; CHECK-LABEL: test_mask_or_ps_rr_512:
1149; CHECK:       ## BB#0:
1150; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
1151; CHECK-NEXT:    retq ## encoding: [0xc3]
1152  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1153  ret <16 x float> %res
1154}
1155
1156define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1157; CHECK-LABEL: test_mask_or_ps_rrk_512:
1158; CHECK:       ## BB#0:
1159; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1160; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
1161; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1162; CHECK-NEXT:    retq ## encoding: [0xc3]
1163  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1164  ret <16 x float> %res
1165}
1166
1167define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1168; CHECK-LABEL: test_mask_or_ps_rrkz_512:
1169; CHECK:       ## BB#0:
1170; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1171; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
1172; CHECK-NEXT:    retq ## encoding: [0xc3]
1173  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1174  ret <16 x float> %res
1175}
1176
1177define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1178; CHECK-LABEL: test_mask_or_ps_rm_512:
1179; CHECK:       ## BB#0:
1180; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
1181; CHECK-NEXT:    retq ## encoding: [0xc3]
1182  %b = load <16 x float>, <16 x float>* %ptr_b
1183  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1184  ret <16 x float> %res
1185}
1186
1187define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1188; CHECK-LABEL: test_mask_or_ps_rmk_512:
1189; CHECK:       ## BB#0:
1190; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1191; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
1192; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1193; CHECK-NEXT:    retq ## encoding: [0xc3]
1194  %b = load <16 x float>, <16 x float>* %ptr_b
1195  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1196  ret <16 x float> %res
1197}
1198
1199define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1200; CHECK-LABEL: test_mask_or_ps_rmkz_512:
1201; CHECK:       ## BB#0:
1202; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1203; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
1204; CHECK-NEXT:    retq ## encoding: [0xc3]
1205  %b = load <16 x float>, <16 x float>* %ptr_b
1206  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1207  ret <16 x float> %res
1208}
1209
1210define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1211; CHECK-LABEL: test_mask_or_ps_rmb_512:
1212; CHECK:       ## BB#0:
1213; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
1214; CHECK-NEXT:    retq ## encoding: [0xc3]
1215  %q = load float, float* %ptr_b
1216  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1217  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1218  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1219  ret <16 x float> %res
1220}
1221
1222define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1223; CHECK-LABEL: test_mask_or_ps_rmbk_512:
1224; CHECK:       ## BB#0:
1225; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1226; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
1227; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1228; CHECK-NEXT:    retq ## encoding: [0xc3]
1229  %q = load float, float* %ptr_b
1230  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1231  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1232  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1233  ret <16 x float> %res
1234}
1235
1236define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1237; CHECK-LABEL: test_mask_or_ps_rmbkz_512:
1238; CHECK:       ## BB#0:
1239; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1240; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
1241; CHECK-NEXT:    retq ## encoding: [0xc3]
1242  %q = load float, float* %ptr_b
1243  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1244  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1245  %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1246  ret <16 x float> %res
1247}
1248
1249declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1250
1251define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
1252; CHECK-LABEL: test_mask_xor_ps_rr_128:
1253; CHECK:       ## BB#0:
1254; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1]
1255; CHECK-NEXT:    retq ## encoding: [0xc3]
1256  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1257  ret <4 x float> %res
1258}
1259
1260define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
1261; CHECK-LABEL: test_mask_xor_ps_rrk_128:
1262; CHECK:       ## BB#0:
1263; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1264; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
1265; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1266; CHECK-NEXT:    retq ## encoding: [0xc3]
1267  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1268  ret <4 x float> %res
1269}
1270
1271define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
1272; CHECK-LABEL: test_mask_xor_ps_rrkz_128:
1273; CHECK:       ## BB#0:
1274; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1275; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
1276; CHECK-NEXT:    retq ## encoding: [0xc3]
1277  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1278  ret <4 x float> %res
1279}
1280
1281define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
1282; CHECK-LABEL: test_mask_xor_ps_rm_128:
1283; CHECK:       ## BB#0:
1284; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0x07]
1285; CHECK-NEXT:    retq ## encoding: [0xc3]
1286  %b = load <4 x float>, <4 x float>* %ptr_b
1287  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1288  ret <4 x float> %res
1289}
1290
1291define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
1292; CHECK-LABEL: test_mask_xor_ps_rmk_128:
1293; CHECK:       ## BB#0:
1294; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1295; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
1296; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1297; CHECK-NEXT:    retq ## encoding: [0xc3]
1298  %b = load <4 x float>, <4 x float>* %ptr_b
1299  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1300  ret <4 x float> %res
1301}
1302
1303define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
1304; CHECK-LABEL: test_mask_xor_ps_rmkz_128:
1305; CHECK:       ## BB#0:
1306; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1307; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
1308; CHECK-NEXT:    retq ## encoding: [0xc3]
1309  %b = load <4 x float>, <4 x float>* %ptr_b
1310  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1311  ret <4 x float> %res
1312}
1313
1314define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
1315; CHECK-LABEL: test_mask_xor_ps_rmb_128:
1316; CHECK:       ## BB#0:
1317; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
1318; CHECK-NEXT:    retq ## encoding: [0xc3]
1319  %q = load float, float* %ptr_b
1320  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1321  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1322  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
1323  ret <4 x float> %res
1324}
1325
1326define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
1327; CHECK-LABEL: test_mask_xor_ps_rmbk_128:
1328; CHECK:       ## BB#0:
1329; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1330; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
1331; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1332; CHECK-NEXT:    retq ## encoding: [0xc3]
1333  %q = load float, float* %ptr_b
1334  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1335  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1336  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
1337  ret <4 x float> %res
1338}
1339
1340define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
1341; CHECK-LABEL: test_mask_xor_ps_rmbkz_128:
1342; CHECK:       ## BB#0:
1343; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1344; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
1345; CHECK-NEXT:    retq ## encoding: [0xc3]
1346  %q = load float, float* %ptr_b
1347  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
1348  %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
1349  %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
1350  ret <4 x float> %res
1351}
1352
1353declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1354
1355define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
1356; CHECK-LABEL: test_mask_xor_ps_rr_256:
1357; CHECK:       ## BB#0:
1358; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0xc1]
1359; CHECK-NEXT:    retq ## encoding: [0xc3]
1360  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1361  ret <8 x float> %res
1362}
1363
1364define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
1365; CHECK-LABEL: test_mask_xor_ps_rrk_256:
1366; CHECK:       ## BB#0:
1367; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1368; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
1369; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1370; CHECK-NEXT:    retq ## encoding: [0xc3]
1371  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1372  ret <8 x float> %res
1373}
1374
1375define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
1376; CHECK-LABEL: test_mask_xor_ps_rrkz_256:
1377; CHECK:       ## BB#0:
1378; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1379; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
1380; CHECK-NEXT:    retq ## encoding: [0xc3]
1381  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1382  ret <8 x float> %res
1383}
1384
1385define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
1386; CHECK-LABEL: test_mask_xor_ps_rm_256:
1387; CHECK:       ## BB#0:
1388; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0x07]
1389; CHECK-NEXT:    retq ## encoding: [0xc3]
1390  %b = load <8 x float>, <8 x float>* %ptr_b
1391  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1392  ret <8 x float> %res
1393}
1394
1395define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
1396; CHECK-LABEL: test_mask_xor_ps_rmk_256:
1397; CHECK:       ## BB#0:
1398; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1399; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
1400; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1401; CHECK-NEXT:    retq ## encoding: [0xc3]
1402  %b = load <8 x float>, <8 x float>* %ptr_b
1403  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1404  ret <8 x float> %res
1405}
1406
1407define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
1408; CHECK-LABEL: test_mask_xor_ps_rmkz_256:
1409; CHECK:       ## BB#0:
1410; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1411; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
1412; CHECK-NEXT:    retq ## encoding: [0xc3]
1413  %b = load <8 x float>, <8 x float>* %ptr_b
1414  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1415  ret <8 x float> %res
1416}
1417
1418define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
1419; CHECK-LABEL: test_mask_xor_ps_rmb_256:
1420; CHECK:       ## BB#0:
1421; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
1422; CHECK-NEXT:    retq ## encoding: [0xc3]
1423  %q = load float, float* %ptr_b
1424  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1425  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1426  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
1427  ret <8 x float> %res
1428}
1429
1430define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
1431; CHECK-LABEL: test_mask_xor_ps_rmbk_256:
1432; CHECK:       ## BB#0:
1433; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1434; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
1435; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1436; CHECK-NEXT:    retq ## encoding: [0xc3]
1437  %q = load float, float* %ptr_b
1438  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1439  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1440  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
1441  ret <8 x float> %res
1442}
1443
1444define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
1445; CHECK-LABEL: test_mask_xor_ps_rmbkz_256:
1446; CHECK:       ## BB#0:
1447; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
1448; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
1449; CHECK-NEXT:    retq ## encoding: [0xc3]
1450  %q = load float, float* %ptr_b
1451  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
1452  %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
1453  %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
1454  ret <8 x float> %res
1455}
1456
1457declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1458
1459define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
1460; CHECK-LABEL: test_mask_xor_ps_rr_512:
1461; CHECK:       ## BB#0:
1462; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
1463; CHECK-NEXT:    retq ## encoding: [0xc3]
1464  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1465  ret <16 x float> %res
1466}
1467
1468define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
1469; CHECK-LABEL: test_mask_xor_ps_rrk_512:
1470; CHECK:       ## BB#0:
1471; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1472; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
1473; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1474; CHECK-NEXT:    retq ## encoding: [0xc3]
1475  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1476  ret <16 x float> %res
1477}
1478
1479define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
1480; CHECK-LABEL: test_mask_xor_ps_rrkz_512:
1481; CHECK:       ## BB#0:
1482; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1483; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
1484; CHECK-NEXT:    retq ## encoding: [0xc3]
1485  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1486  ret <16 x float> %res
1487}
1488
1489define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
1490; CHECK-LABEL: test_mask_xor_ps_rm_512:
1491; CHECK:       ## BB#0:
1492; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
1493; CHECK-NEXT:    retq ## encoding: [0xc3]
1494  %b = load <16 x float>, <16 x float>* %ptr_b
1495  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1496  ret <16 x float> %res
1497}
1498
1499define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
1500; CHECK-LABEL: test_mask_xor_ps_rmk_512:
1501; CHECK:       ## BB#0:
1502; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1503; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
1504; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1505; CHECK-NEXT:    retq ## encoding: [0xc3]
1506  %b = load <16 x float>, <16 x float>* %ptr_b
1507  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1508  ret <16 x float> %res
1509}
1510
1511define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
1512; CHECK-LABEL: test_mask_xor_ps_rmkz_512:
1513; CHECK:       ## BB#0:
1514; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1515; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
1516; CHECK-NEXT:    retq ## encoding: [0xc3]
1517  %b = load <16 x float>, <16 x float>* %ptr_b
1518  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1519  ret <16 x float> %res
1520}
1521
1522define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
1523; CHECK-LABEL: test_mask_xor_ps_rmb_512:
1524; CHECK:       ## BB#0:
1525; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
1526; CHECK-NEXT:    retq ## encoding: [0xc3]
1527  %q = load float, float* %ptr_b
1528  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1529  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1530  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
1531  ret <16 x float> %res
1532}
1533
1534define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
1535; CHECK-LABEL: test_mask_xor_ps_rmbk_512:
1536; CHECK:       ## BB#0:
1537; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1538; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
1539; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1540; CHECK-NEXT:    retq ## encoding: [0xc3]
1541  %q = load float, float* %ptr_b
1542  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1543  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1544  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
1545  ret <16 x float> %res
1546}
1547
1548define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
1549; CHECK-LABEL: test_mask_xor_ps_rmbkz_512:
1550; CHECK:       ## BB#0:
1551; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1552; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
1553; CHECK-NEXT:    retq ## encoding: [0xc3]
1554  %q = load float, float* %ptr_b
1555  %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
1556  %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
1557  %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
1558  ret <16 x float> %res
1559}
1560
1561declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1562
1563declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
1564
1565define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
1566; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
1567; CHECK:       ## BB#0:
1568; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1569; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
1570; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
1571; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1572; CHECK-NEXT:    retq ## encoding: [0xc3]
1573  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
1574  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
1575  %res2 = add <2 x i64> %res, %res1
1576  ret <2 x i64> %res2
1577}
1578
1579declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
1580
1581define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
1582; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
1583; CHECK:       ## BB#0:
1584; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1585; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
1586; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
1587; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1588; CHECK-NEXT:    retq ## encoding: [0xc3]
1589  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
1590  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
1591  %res2 = add <4 x i64> %res, %res1
1592  ret <4 x i64> %res2
1593}
1594
1595declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
1596
1597define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
1598; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
1599; CHECK:       ## BB#0:
1600; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1601; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
1602; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
1603; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1604; CHECK-NEXT:    retq ## encoding: [0xc3]
1605  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
1606  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
1607  %res2 = add <2 x i64> %res, %res1
1608  ret <2 x i64> %res2
1609}
1610
1611declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
1612
1613define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
1614; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
1615; CHECK:       ## BB#0:
1616; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1617; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
1618; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
1619; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1620; CHECK-NEXT:    retq ## encoding: [0xc3]
1621  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
1622  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
1623  %res2 = add <4 x i64> %res, %res1
1624  ret <4 x i64> %res2
1625}
1626
1627declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
1628
1629define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1630; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
1631; CHECK:       ## BB#0:
1632; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1633; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
1634; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
1635; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1636; CHECK-NEXT:    retq ## encoding: [0xc3]
1637  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1638  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1639  %res2 = add <2 x i64> %res, %res1
1640  ret <2 x i64> %res2
1641}
1642
1643declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
1644
1645define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1646; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
1647; CHECK:       ## BB#0:
1648; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1649; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
1650; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
1651; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1652; CHECK-NEXT:    retq ## encoding: [0xc3]
1653  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1654  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1655  %res2 = add <4 x i64> %res, %res1
1656  ret <4 x i64> %res2
1657}
1658
1659declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
1660
1661define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1662; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
1663; CHECK:       ## BB#0:
1664; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1665; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
1666; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
1667; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1668; CHECK-NEXT:    retq ## encoding: [0xc3]
1669  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1670  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1671  %res2 = add <2 x i64> %res, %res1
1672  ret <2 x i64> %res2
1673}
1674
1675declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
1676
1677define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1678; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
1679; CHECK:       ## BB#0:
1680; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1681; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
1682; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
1683; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1684; CHECK-NEXT:    retq ## encoding: [0xc3]
1685  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1686  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1687  %res2 = add <4 x i64> %res, %res1
1688  ret <4 x i64> %res2
1689}
1690
1691declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
1692
1693define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
1694; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
1695; CHECK:       ## BB#0:
1696; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1697; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
1698; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0]
1699; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
1700; CHECK-NEXT:    retq ## encoding: [0xc3]
1701  %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
1702  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
1703  %res2 = fadd <2 x double> %res, %res1
1704  ret <2 x double> %res2
1705}
1706
1707declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
1708
1709define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
1710; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
1711; CHECK:       ## BB#0:
1712; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1713; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
1714; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0]
1715; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
1716; CHECK-NEXT:    retq ## encoding: [0xc3]
1717  %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
1718  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
1719  %res2 = fadd <4 x double> %res, %res1
1720  ret <4 x double> %res2
1721}
1722
1723declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
1724
1725define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1726; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
1727; CHECK:       ## BB#0:
1728; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1729; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
1730; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
1731; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
1732; CHECK-NEXT:    retq ## encoding: [0xc3]
1733  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1734  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
1735  %res2 = fadd <4 x float> %res, %res1
1736  ret <4 x float> %res2
1737}
1738
1739declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
1740
1741define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
1742; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
1743; CHECK:       ## BB#0:
1744; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1745; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
1746; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
1747; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
1748; CHECK-NEXT:    retq ## encoding: [0xc3]
1749  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
1750  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
1751  %res2 = fadd <4 x float> %res, %res1
1752  ret <4 x float> %res2
1753}
1754
1755declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
1756
1757define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
1758; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
1759; CHECK:       ## BB#0:
1760; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1761; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
1762; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
1763; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1764; CHECK-NEXT:    retq ## encoding: [0xc3]
1765  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
1766  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
1767  %res2 = add <2 x i64> %res, %res1
1768  ret <2 x i64> %res2
1769}
1770
1771declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
1772
1773define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
1774; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
1775; CHECK:       ## BB#0:
1776; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1777; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
1778; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
1779; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1780; CHECK-NEXT:    retq ## encoding: [0xc3]
1781  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
1782  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
1783  %res2 = add <4 x i64> %res, %res1
1784  ret <4 x i64> %res2
1785}
1786
1787declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
1788
1789define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
1790; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
1791; CHECK:       ## BB#0:
1792; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1793; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
1794; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
1795; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1796; CHECK-NEXT:    retq ## encoding: [0xc3]
1797  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
1798  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
1799  %res2 = add <2 x i64> %res, %res1
1800  ret <2 x i64> %res2
1801}
1802
1803declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
1804
1805define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
1806; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
1807; CHECK:       ## BB#0:
1808; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1809; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
1810; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
1811; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1812; CHECK-NEXT:    retq ## encoding: [0xc3]
1813  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
1814  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
1815  %res2 = add <4 x i64> %res, %res1
1816  ret <4 x i64> %res2
1817}
1818
1819declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
1820
1821define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1822; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
1823; CHECK:       ## BB#0:
1824; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1825; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
1826; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
1827; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1828; CHECK-NEXT:    retq ## encoding: [0xc3]
1829  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1830  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1831  %res2 = add <2 x i64> %res, %res1
1832  ret <2 x i64> %res2
1833}
1834
1835declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
1836
1837define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1838; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
1839; CHECK:       ## BB#0:
1840; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1841; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
1842; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
1843; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1844; CHECK-NEXT:    retq ## encoding: [0xc3]
1845  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1846  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1847  %res2 = add <4 x i64> %res, %res1
1848  ret <4 x i64> %res2
1849}
1850
1851declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
1852
1853define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
1854; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
1855; CHECK:       ## BB#0:
1856; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1857; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
1858; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0]
1859; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
1860; CHECK-NEXT:    retq ## encoding: [0xc3]
1861  %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
1862  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
1863  %res2 = fadd <2 x double> %res, %res1
1864  ret <2 x double> %res2
1865}
1866
1867declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
1868
1869define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
1870; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
1871; CHECK:       ## BB#0:
1872; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1873; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
1874; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0]
1875; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
1876; CHECK-NEXT:    retq ## encoding: [0xc3]
1877  %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
1878  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
1879  %res2 = fadd <4 x double> %res, %res1
1880  ret <4 x double> %res2
1881}
1882
1883declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
1884
1885define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1886; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
1887; CHECK:       ## BB#0:
1888; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1889; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1890; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
1891; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
1892; CHECK-NEXT:    retq ## encoding: [0xc3]
1893  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1894  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
1895  %res2 = fadd <4 x float> %res, %res1
1896  ret <4 x float> %res2
1897}
1898
1899declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
1900
1901define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
1902; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
1903; CHECK:       ## BB#0:
1904; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1905; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
1906; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
1907; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
1908; CHECK-NEXT:    retq ## encoding: [0xc3]
1909  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
1910  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
1911  %res2 = fadd <4 x float> %res, %res1
1912  ret <4 x float> %res2
1913}
1914
1915declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
1916
1917define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1918; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
1919; CHECK:       ## BB#0:
1920; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1921; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
1922; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
1923; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
1924; CHECK-NEXT:    retq ## encoding: [0xc3]
1925  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1926  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1927  %res2 = add <2 x i64> %res, %res1
1928  ret <2 x i64> %res2
1929}
1930
1931declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
1932
1933define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1934; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1935; CHECK:       ## BB#0:
1936; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1937; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1938; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
1939; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
1940; CHECK-NEXT:    retq ## encoding: [0xc3]
1941  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1942  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1943  %res2 = add <4 x i64> %res, %res1
1944  ret <4 x i64> %res2
1945}
1946
1947declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
1948
1949define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
1950; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1951; CHECK:       ## BB#0:
1952; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1953; CHECK-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1954; CHECK-NEXT:    vreducepd $8, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1955; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
1956; CHECK-NEXT:    retq ## encoding: [0xc3]
1957  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
1958  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
1959  %res2 = fadd <2 x double> %res, %res1
1960  ret <2 x double> %res2
1961}
1962
1963declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
1964
1965define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
1966; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1967; CHECK:       ## BB#0:
1968; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1969; CHECK-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1970; CHECK-NEXT:    vreducepd $0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1971; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
1972; CHECK-NEXT:    retq ## encoding: [0xc3]
1973  %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
1974  %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
1975  %res2 = fadd <4 x double> %res, %res1
1976  ret <4 x double> %res2
1977}
1978
1979declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
1980
1981define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
1982; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1983; CHECK:       ## BB#0:
1984; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
1985; CHECK-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1986; CHECK-NEXT:    vreduceps $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1987; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
1988; CHECK-NEXT:    retq ## encoding: [0xc3]
1989  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
1990  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
1991  %res2 = fadd <4 x float> %res, %res1
1992  ret <4 x float> %res2
1993}
1994
1995declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
1996
1997define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
1998; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1999; CHECK:       ## BB#0:
2000; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2001; CHECK-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
2002; CHECK-NEXT:    vreduceps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
2003; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
2004; CHECK-NEXT:    retq ## encoding: [0xc3]
2005  %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
2006  %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
2007  %res2 = fadd <8 x float> %res, %res1
2008  ret <8 x float> %res2
2009}
2010
2011declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
2012
2013define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
2014; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_128:
2015; CHECK:       ## BB#0:
2016; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2017; CHECK-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
2018; CHECK-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
2019; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
2020; CHECK-NEXT:    retq ## encoding: [0xc3]
2021  %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
2022  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
2023  %res2 = fadd <2 x double> %res, %res1
2024  ret <2 x double> %res2
2025}
2026
2027declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
2028
2029define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
2030; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_256:
2031; CHECK:       ## BB#0:
2032; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2033; CHECK-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
2034; CHECK-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
2035; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
2036; CHECK-NEXT:    retq ## encoding: [0xc3]
2037  %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
2038  %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
2039  %res2 = fadd <4 x double> %res, %res1
2040  ret <4 x double> %res2
2041}
2042
2043declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
2044
2045define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
2046; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_128:
2047; CHECK:       ## BB#0:
2048; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2049; CHECK-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
2050; CHECK-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
2051; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
2052; CHECK-NEXT:    retq ## encoding: [0xc3]
2053  %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
2054  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
2055  %res2 = fadd <4 x float> %res, %res1
2056  ret <4 x float> %res2
2057}
2058
2059declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
2060
2061define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
2062; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_256:
2063; CHECK:       ## BB#0:
2064; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2065; CHECK-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
2066; CHECK-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
2067; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
2068; CHECK-NEXT:    retq ## encoding: [0xc3]
2069  %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
2070  %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
2071  %res2 = fadd <8 x float> %res, %res1
2072  ret <8 x float> %res2
2073}
2074
2075declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
2076
2077define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
2078; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
2079; CHECK:       ## BB#0:
2080; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2081; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
2082; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc2,0x01]
2083; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc0,0x01]
2084; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
2085; CHECK-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2]
2086; CHECK-NEXT:    retq ## encoding: [0xc3]
2087  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
2088  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
2089  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
2090  %res3 = fadd <2 x double> %res, %res1
2091  %res4 = fadd <2 x double> %res3, %res2
2092  ret <2 x double> %res4
2093}
2094
2095declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
2096
2097define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
2098; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
2099; CHECK:       ## BB#0:
2100; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2101; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
2102; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xd9,0x01]
2103; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xc1,0x01]
2104; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
2105; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
2106; CHECK-NEXT:    retq ## encoding: [0xc3]
2107  %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
2108  %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
2109  %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
2110  %res3 = fadd <4 x double> %res, %res1
2111  %res4 = fadd <4 x double> %res2, %res3
2112  ret <4 x double> %res4
2113}
2114
2115declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
2116
2117define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
2118; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
2119; CHECK:       ## BB#0:
2120; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2121; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
2122; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xd9,0x01]
2123; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xc1,0x01]
2124; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2125; CHECK-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3]
2126; CHECK-NEXT:    retq ## encoding: [0xc3]
2127  %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
2128  %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
2129  %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
2130  %res3 = add <4 x i64> %res, %res1
2131  %res4 = add <4 x i64> %res3, %res2
2132  ret <4 x i64> %res4
2133}
2134
2135declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
2136
2137define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
2138; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
2139; CHECK:       ## BB#0:
2140; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2141; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
2142; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
2143; CHECK-NEXT:    vfpclassps $4, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc0,0x04]
2144; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2145; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
2146; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2147; CHECK-NEXT:    retq ## encoding: [0xc3]
2148  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1)
2149  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1)
2150  %res2 = add i8 %res, %res1
2151  ret i8 %res2
2152}
2153
2154declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
2155
2156define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) {
2157; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
2158; CHECK:       ## BB#0:
2159; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2160; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
2161; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
2162; CHECK-NEXT:    vfpclassps $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc0,0x04]
2163; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2164; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
2165; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2166; CHECK-NEXT:    retq ## encoding: [0xc3]
2167  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1)
2168  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1)
2169  %res2 = add i8 %res, %res1
2170  ret i8 %res2
2171}
2172
2173declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
2174
2175define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) {
2176; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
2177; CHECK:       ## BB#0:
2178; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2179; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
2180; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
2181; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc0,0x02]
2182; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2183; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
2184; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2185; CHECK-NEXT:    retq ## encoding: [0xc3]
2186  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1)
2187  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1)
2188  %res2 = add i8 %res, %res1
2189  ret i8 %res2
2190}
2191
2192declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
2193
2194define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
2195; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
2196; CHECK:       ## BB#0:
2197; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2198; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
2199; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
2200; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc0,0x04]
2201; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2202; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
2203; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2204; CHECK-NEXT:    retq ## encoding: [0xc3]
2205  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1)
2206  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1)
2207  %res2 = add i8 %res, %res1
2208  ret i8 %res2
2209}
2210
2211declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
2212
2213define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
2214; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
2215; CHECK:       ## BB#0:
2216; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2217; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x19,0xc8]
2218; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0]
2219; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0]
2220; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
2221; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
2222; CHECK-NEXT:    retq ## encoding: [0xc3]
2223  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
2224  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
2225  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
2226  %res3 = fadd <8 x float> %res, %res1
2227  %res4 = fadd <8 x float> %res3, %res2
2228  ret <8 x float> %res4
2229}
2230
2231declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
2232
2233define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) {
2234; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
2235; CHECK:       ## BB#0:
2236; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2237; CHECK-NEXT:    vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e]
2238; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0]
2239; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0]
2240; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
2241; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
2242; CHECK-NEXT:    retq ## encoding: [0xc3]
2243  %y_64  = load i64, i64 * %y_ptr
2244  %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0
2245  %y = bitcast <2 x i64> %y_v2i64 to <4 x i32>
2246  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %y, <8 x i32> %x2, i8 %x3)
2247  %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
2248  %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
2249  %res3 = add <8 x i32> %res, %res1
2250  %res4 = add <8 x i32> %res3, %res2
2251  ret <8 x i32> %res4
2252}
2253
2254declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
2255
2256define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
2257; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
2258; CHECK:       ## BB#0:
2259; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2260; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8]
2261; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0]
2262; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x59,0xc0]
2263; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
2264; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
2265; CHECK-NEXT:    retq ## encoding: [0xc3]
2266  %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
2267  %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
2268  %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
2269  %res3 = add <4 x i32> %res, %res1
2270  %res4 = add <4 x i32> %res3, %res2
2271  ret <4 x i32> %res4
2272}
2273
2274declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
2275
2276define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
2277; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
2278; CHECK:       ## BB#0:
2279; CHECK-NEXT:    vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
2280; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2281; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2282; CHECK-NEXT:    retq ## encoding: [0xc3]
2283    %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
2284    ret i8 %res
2285}
2286
2287declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
2288
2289define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
2290; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
2291; CHECK:       ## BB#0:
2292; CHECK-NEXT:    vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
2293; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2294; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2295; CHECK-NEXT:    retq ## encoding: [0xc3]
2296    %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
2297    ret i8 %res
2298}
2299
2300declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
2301
2302define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
2303; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
2304; CHECK:       ## BB#0:
2305; CHECK-NEXT:    vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
2306; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2307; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2308; CHECK-NEXT:    retq ## encoding: [0xc3]
2309    %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
2310    ret i8 %res
2311}
2312
2313declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
2314
2315define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
2316; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
2317; CHECK:       ## BB#0:
2318; CHECK-NEXT:    vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
2319; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
2320; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
2321; CHECK-NEXT:    retq ## encoding: [0xc3]
2322    %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
2323    ret i8 %res
2324}
2325
2326declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
2327
2328define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
2329; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128:
2330; CHECK:       ## BB#0:
2331; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
2332; CHECK-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
2333; CHECK-NEXT:    retq ## encoding: [0xc3]
2334  %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
2335  ret <4 x i32> %res
2336}
2337
2338declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
2339
2340define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
2341; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256:
2342; CHECK:       ## BB#0:
2343; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
2344; CHECK-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
2345; CHECK-NEXT:    retq ## encoding: [0xc3]
2346  %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
2347  ret <8 x i32> %res
2348}
2349
2350declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
2351
2352define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
2353; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128:
2354; CHECK:       ## BB#0:
2355; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
2356; CHECK-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
2357; CHECK-NEXT:    retq ## encoding: [0xc3]
2358  %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
2359  ret <2 x i64> %res
2360}
2361
2362declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
2363
2364define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
2365; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256:
2366; CHECK:       ## BB#0:
2367; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
2368; CHECK-NEXT:    vpmovm2q %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
2369; CHECK-NEXT:    retq ## encoding: [0xc3]
2370  %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
2371  ret <4 x i64> %res
2372}
2373declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
2374
2375define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
2376; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
2377; CHECK:       ## BB#0:
2378; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
2379; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2380; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00]
2381; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
2382; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00]
2383; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,0,1]
2384; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00]
2385; CHECK-NEXT:    ## ymm0 = ymm0[0,1,0,1]
2386; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
2387; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
2388; CHECK-NEXT:    retq ## encoding: [0xc3]
2389
2390  %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
2391  %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
2392  %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
2393  %res4 = fadd <4 x double> %res1, %res2
2394  %res5 = fadd <4 x double> %res3, %res4
2395  ret <4 x double> %res5
2396}
2397
2398declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
2399
2400define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
2401; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
2402; CHECK:       ## BB#0:
2403; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
2404; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
2405; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00]
2406; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
2407; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00]
2408; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,0,1]
2409; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00]
2410; CHECK-NEXT:    ## ymm0 = ymm0[0,1,0,1]
2411; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
2412; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2413; CHECK-NEXT:    retq ## encoding: [0xc3]
2414
2415  %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
2416  %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
2417  %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
2418  %res4 = add <4 x i64> %res1, %res2
2419  %res5 = add <4 x i64> %res3, %res4
2420  ret <4 x i64> %res5
2421}
2422