1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5
6;
7; DemandedBits - MOVMSK zeros the upper bits of the result.
8;
9
10define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
11; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
12; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
13; CHECK-NEXT:    ret i32 [[TMP1]]
14;
15  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
16  %2 = and i32 %1, 255
17  ret i32 %2
18}
19
20define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
21; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
22; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
23; CHECK-NEXT:    ret i32 [[TMP1]]
24;
25  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
26  %2 = and i32 %1, 15
27  ret i32 %2
28}
29
30define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
31; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
32; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
33; CHECK-NEXT:    ret i32 [[TMP1]]
34;
35  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
36  %2 = and i32 %1, 3
37  ret i32 %2
38}
39
40define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
41; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
42; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
43; CHECK-NEXT:    ret i32 [[TMP1]]
44;
45  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
46  %2 = and i32 %1, 65535
47  ret i32 %2
48}
49
50define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
51; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
52; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
53; CHECK-NEXT:    ret i32 [[TMP1]]
54;
55  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
56  %2 = and i32 %1, 255
57  ret i32 %2
58}
59
60define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
61; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
62; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
63; CHECK-NEXT:    ret i32 [[TMP1]]
64;
65  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
66  %2 = and i32 %1, 15
67  ret i32 %2
68}
69
70; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
71
72;
73; DemandedBits - If we don't use the lower bits then we just return zero.
74;
75
76define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
77; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
78; CHECK-NEXT:    ret i32 0
79;
80  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
81  %2 = and i32 %1, -256
82  ret i32 %2
83}
84
85define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
86; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
87; CHECK-NEXT:    ret i32 0
88;
89  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
90  %2 = and i32 %1, -16
91  ret i32 %2
92}
93
94define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
95; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
96; CHECK-NEXT:    ret i32 0
97;
98  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
99  %2 = and i32 %1, -4
100  ret i32 %2
101}
102
103define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
104; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
105; CHECK-NEXT:    ret i32 0
106;
107  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
108  %2 = and i32 %1, -65536
109  ret i32 %2
110}
111
112define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
113; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
114; CHECK-NEXT:    ret i32 0
115;
116  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
117  %2 = and i32 %1, -256
118  ret i32 %2
119}
120
121define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
122; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
123; CHECK-NEXT:    ret i32 0
124;
125  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
126  %2 = and i32 %1, -16
127  ret i32 %2
128}
129
130; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
131
132;
133; Constant Folding (UNDEF -> ZERO)
134;
135
136define i32 @undef_x86_mmx_pmovmskb() {
137; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
138; CHECK-NEXT:    ret i32 0
139;
140  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef)
141  ret i32 %1
142}
143
144define i32 @undef_x86_sse_movmsk_ps() {
145; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
146; CHECK-NEXT:    ret i32 0
147;
148  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
149  ret i32 %1
150}
151
152define i32 @undef_x86_sse2_movmsk_pd() {
153; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
154; CHECK-NEXT:    ret i32 0
155;
156  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
157  ret i32 %1
158}
159
160define i32 @undef_x86_sse2_pmovmskb_128() {
161; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
162; CHECK-NEXT:    ret i32 0
163;
164  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
165  ret i32 %1
166}
167
168define i32 @undef_x86_avx_movmsk_ps_256() {
169; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
170; CHECK-NEXT:    ret i32 0
171;
172  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
173  ret i32 %1
174}
175
176define i32 @undef_x86_avx_movmsk_pd_256() {
177; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
178; CHECK-NEXT:    ret i32 0
179;
180  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
181  ret i32 %1
182}
183
184define i32 @undef_x86_avx2_pmovmskb() {
185; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
186; CHECK-NEXT:    ret i32 0
187;
188  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
189  ret i32 %1
190}
191
192;
193; Constant Folding (ZERO -> ZERO)
194;
195
196define i32 @zero_x86_mmx_pmovmskb() {
197; CHECK-LABEL: @zero_x86_mmx_pmovmskb(
198; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx))
199; CHECK-NEXT:    ret i32 [[TMP1]]
200;
201  %1 = bitcast <1 x i64> zeroinitializer to x86_mmx
202  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
203  ret i32 %2
204}
205
206define i32 @zero_x86_sse_movmsk_ps() {
207; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
208; CHECK-NEXT:    ret i32 0
209;
210  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
211  ret i32 %1
212}
213
214define i32 @zero_x86_sse2_movmsk_pd() {
215; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
216; CHECK-NEXT:    ret i32 0
217;
218  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
219  ret i32 %1
220}
221
222define i32 @zero_x86_sse2_pmovmskb_128() {
223; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
224; CHECK-NEXT:    ret i32 0
225;
226  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
227  ret i32 %1
228}
229
230define i32 @zero_x86_avx_movmsk_ps_256() {
231; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
232; CHECK-NEXT:    ret i32 0
233;
234  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
235  ret i32 %1
236}
237
238define i32 @zero_x86_avx_movmsk_pd_256() {
239; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
240; CHECK-NEXT:    ret i32 0
241;
242  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
243  ret i32 %1
244}
245
246define i32 @zero_x86_avx2_pmovmskb() {
247; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
248; CHECK-NEXT:    ret i32 0
249;
250  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
251  ret i32 %1
252}
253
254;
255; Constant Folding
256;
257
258define i32 @fold_x86_mmx_pmovmskb() {
259; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
260; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx))
261; CHECK-NEXT:    ret i32 [[TMP1]]
262;
263  %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx
264  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
265  ret i32 %2
266}
267
268define i32 @fold_x86_sse_movmsk_ps() {
269; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
270; CHECK-NEXT:    ret i32 10
271;
272  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
273  ret i32 %1
274}
275
276define i32 @fold_x86_sse2_movmsk_pd() {
277; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
278; CHECK-NEXT:    ret i32 2
279;
280  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
281  ret i32 %1
282}
283
284define i32 @fold_x86_sse2_pmovmskb_128() {
285; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
286; CHECK-NEXT:    ret i32 5654
287;
288  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
289  ret i32 %1
290}
291
292define i32 @fold_x86_avx_movmsk_ps_256() {
293; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
294; CHECK-NEXT:    ret i32 170
295;
296  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
297  ret i32 %1
298}
299
300define i32 @fold_x86_avx_movmsk_pd_256() {
301; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
302; CHECK-NEXT:    ret i32 10
303;
304  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
305  ret i32 %1
306}
307
308define i32 @fold_x86_avx2_pmovmskb() {
309; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
310; CHECK-NEXT:    ret i32 370546176
311;
312  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
313  ret i32 %1
314}
315
316declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
317
318declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
319declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
320declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
321
322declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
323declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
324declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)
325