1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
6
7; Lower common integer comparisons such as 'isPositive' efficiently:
8; https://llvm.org/bugs/show_bug.cgi?id=26701
9
10define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
11; SSE-LABEL: test_pcmpgtb:
12; SSE:       # %bb.0:
13; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
14; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: test_pcmpgtb:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
20; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
21; AVX-NEXT:    retq
22  %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
23  %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
24  ret <16 x i8> %not
25}
26
27define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
28; SSE-LABEL: test_pcmpgtw:
29; SSE:       # %bb.0:
30; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
31; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
32; SSE-NEXT:    retq
33;
34; AVX-LABEL: test_pcmpgtw:
35; AVX:       # %bb.0:
36; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
37; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
38; AVX-NEXT:    retq
39  %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
40  %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
41  ret <8 x i16> %not
42}
43
44define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
45; SSE-LABEL: test_pcmpgtd:
46; SSE:       # %bb.0:
47; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
48; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
49; SSE-NEXT:    retq
50;
51; AVX-LABEL: test_pcmpgtd:
52; AVX:       # %bb.0:
53; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
54; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
55; AVX-NEXT:    retq
56  %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
57  %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
58  ret <4 x i32> %not
59}
60
61define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
62; SSE2-LABEL: test_pcmpgtq:
63; SSE2:       # %bb.0:
64; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
65; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
67; SSE2-NEXT:    retq
68;
69; SSE42-LABEL: test_pcmpgtq:
70; SSE42:       # %bb.0:
71; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
72; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
73; SSE42-NEXT:    retq
74;
75; AVX-LABEL: test_pcmpgtq:
76; AVX:       # %bb.0:
77; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
78; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
79; AVX-NEXT:    retq
80  %sign = ashr <2 x i64> %x, <i64 63, i64 63>
81  %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
82  ret <2 x i64> %not
83}
84
85define <1 x i128> @test_strange_type(<1 x i128> %x) {
86; CHECK-LABEL: test_strange_type:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    movq %rsi, %rax
89; CHECK-NEXT:    sarq $63, %rax
90; CHECK-NEXT:    notq %rax
91; CHECK-NEXT:    movq %rax, %rdx
92; CHECK-NEXT:    retq
93  %sign = ashr <1 x i128> %x, <i128 127>
94  %not = xor <1 x i128> %sign, <i128 -1>
95  ret <1 x i128> %not
96}
97
98define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
99; SSE-LABEL: test_pcmpgtb_256:
100; SSE:       # %bb.0:
101; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
102; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
103; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
104; SSE-NEXT:    retq
105;
106; AVX1-LABEL: test_pcmpgtb_256:
107; AVX1:       # %bb.0:
108; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
109; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
110; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
111; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
112; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
113; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
114; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
115; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
116; AVX1-NEXT:    retq
117;
118; AVX2-LABEL: test_pcmpgtb_256:
119; AVX2:       # %bb.0:
120; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
121; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
122; AVX2-NEXT:    retq
123  %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
124  %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
125  ret <32 x i8> %not
126}
127
128define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
129; SSE-LABEL: test_pcmpgtw_256:
130; SSE:       # %bb.0:
131; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
132; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
133; SSE-NEXT:    pcmpgtw %xmm2, %xmm1
134; SSE-NEXT:    retq
135;
136; AVX1-LABEL: test_pcmpgtw_256:
137; AVX1:       # %bb.0:
138; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
139; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
140; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
141; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
142; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
143; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
144; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
145; AVX1-NEXT:    retq
146;
147; AVX2-LABEL: test_pcmpgtw_256:
148; AVX2:       # %bb.0:
149; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
150; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
151; AVX2-NEXT:    retq
152  %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
153  %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
154  ret <16 x i16> %not
155}
156
157define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
158; SSE-LABEL: test_pcmpgtd_256:
159; SSE:       # %bb.0:
160; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
161; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
162; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
163; SSE-NEXT:    retq
164;
165; AVX1-LABEL: test_pcmpgtd_256:
166; AVX1:       # %bb.0:
167; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
168; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
169; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
170; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
171; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
172; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
173; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
174; AVX1-NEXT:    retq
175;
176; AVX2-LABEL: test_pcmpgtd_256:
177; AVX2:       # %bb.0:
178; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
179; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
180; AVX2-NEXT:    retq
181  %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
182  %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
183  ret <8 x i32> %not
184}
185
186define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
187; SSE2-LABEL: test_pcmpgtq_256:
188; SSE2:       # %bb.0:
189; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
190; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
191; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
192; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
193; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
194; SSE2-NEXT:    retq
195;
196; SSE42-LABEL: test_pcmpgtq_256:
197; SSE42:       # %bb.0:
198; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
199; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
200; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
201; SSE42-NEXT:    retq
202;
203; AVX1-LABEL: test_pcmpgtq_256:
204; AVX1:       # %bb.0:
205; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
206; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
207; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
208; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
209; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
210; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
211; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
212; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
213; AVX1-NEXT:    retq
214;
215; AVX2-LABEL: test_pcmpgtq_256:
216; AVX2:       # %bb.0:
217; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
218; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
219; AVX2-NEXT:    retq
220  %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
221  %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
222  ret <4 x i64> %not
223}
224
225define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
226; SSE-LABEL: cmpeq_zext_v16i8:
227; SSE:       # %bb.0:
228; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
229; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
230; SSE-NEXT:    retq
231;
232; AVX-LABEL: cmpeq_zext_v16i8:
233; AVX:       # %bb.0:
234; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
235; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
236; AVX-NEXT:    retq
237  %cmp = icmp eq <16 x i8> %a, %b
238  %zext = zext <16 x i1> %cmp to <16 x i8>
239  ret <16 x i8> %zext
240}
241
242define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
243; SSE-LABEL: cmpeq_zext_v16i16:
244; SSE:       # %bb.0:
245; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
246; SSE-NEXT:    psrlw $15, %xmm0
247; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
248; SSE-NEXT:    psrlw $15, %xmm1
249; SSE-NEXT:    retq
250;
251; AVX1-LABEL: cmpeq_zext_v16i16:
252; AVX1:       # %bb.0:
253; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
254; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
255; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
256; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
257; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
258; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
259; AVX1-NEXT:    retq
260;
261; AVX2-LABEL: cmpeq_zext_v16i16:
262; AVX2:       # %bb.0:
263; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
264; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
265; AVX2-NEXT:    retq
266  %cmp = icmp eq <16 x i16> %a, %b
267  %zext = zext <16 x i1> %cmp to <16 x i16>
268  ret <16 x i16> %zext
269}
270
271define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
272; SSE-LABEL: cmpeq_zext_v4i32:
273; SSE:       # %bb.0:
274; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
275; SSE-NEXT:    psrld $31, %xmm0
276; SSE-NEXT:    retq
277;
278; AVX-LABEL: cmpeq_zext_v4i32:
279; AVX:       # %bb.0:
280; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
281; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
282; AVX-NEXT:    retq
283  %cmp = icmp eq <4 x i32> %a, %b
284  %zext = zext <4 x i1> %cmp to <4 x i32>
285  ret <4 x i32> %zext
286}
287
288define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
289; SSE2-LABEL: cmpeq_zext_v4i64:
290; SSE2:       # %bb.0:
291; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
292; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
293; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1,1]
294; SSE2-NEXT:    pand %xmm4, %xmm2
295; SSE2-NEXT:    pand %xmm2, %xmm0
296; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
297; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
298; SSE2-NEXT:    pand %xmm4, %xmm2
299; SSE2-NEXT:    pand %xmm2, %xmm1
300; SSE2-NEXT:    retq
301;
302; SSE42-LABEL: cmpeq_zext_v4i64:
303; SSE42:       # %bb.0:
304; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
305; SSE42-NEXT:    psrlq $63, %xmm0
306; SSE42-NEXT:    pcmpeqq %xmm3, %xmm1
307; SSE42-NEXT:    psrlq $63, %xmm1
308; SSE42-NEXT:    retq
309;
310; AVX1-LABEL: cmpeq_zext_v4i64:
311; AVX1:       # %bb.0:
312; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
313; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
314; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
315; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
316; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
317; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
318; AVX1-NEXT:    retq
319;
320; AVX2-LABEL: cmpeq_zext_v4i64:
321; AVX2:       # %bb.0:
322; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
323; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
324; AVX2-NEXT:    retq
325  %cmp = icmp eq <4 x i64> %a, %b
326  %zext = zext <4 x i1> %cmp to <4 x i64>
327  ret <4 x i64> %zext
328}
329
330define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
331; SSE-LABEL: cmpgt_zext_v32i8:
332; SSE:       # %bb.0:
333; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
334; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
335; SSE-NEXT:    pand %xmm2, %xmm0
336; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
337; SSE-NEXT:    pand %xmm2, %xmm1
338; SSE-NEXT:    retq
339;
340; AVX1-LABEL: cmpgt_zext_v32i8:
341; AVX1:       # %bb.0:
342; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
343; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
344; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
345; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
346; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
347; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
348; AVX1-NEXT:    retq
349;
350; AVX2-LABEL: cmpgt_zext_v32i8:
351; AVX2:       # %bb.0:
352; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
353; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
354; AVX2-NEXT:    retq
355  %cmp = icmp sgt <32 x i8> %a, %b
356  %zext = zext <32 x i1> %cmp to <32 x i8>
357  ret <32 x i8> %zext
358}
359
360define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
361; SSE-LABEL: cmpgt_zext_v8i16:
362; SSE:       # %bb.0:
363; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
364; SSE-NEXT:    psrlw $15, %xmm0
365; SSE-NEXT:    retq
366;
367; AVX-LABEL: cmpgt_zext_v8i16:
368; AVX:       # %bb.0:
369; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
370; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm0
371; AVX-NEXT:    retq
372  %cmp = icmp sgt <8 x i16> %a, %b
373  %zext = zext <8 x i1> %cmp to <8 x i16>
374  ret <8 x i16> %zext
375}
376
377define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
378; SSE-LABEL: cmpgt_zext_v8i32:
379; SSE:       # %bb.0:
380; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
381; SSE-NEXT:    psrld $31, %xmm0
382; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
383; SSE-NEXT:    psrld $31, %xmm1
384; SSE-NEXT:    retq
385;
386; AVX1-LABEL: cmpgt_zext_v8i32:
387; AVX1:       # %bb.0:
388; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
389; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
390; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
391; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
392; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
393; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
394; AVX1-NEXT:    retq
395;
396; AVX2-LABEL: cmpgt_zext_v8i32:
397; AVX2:       # %bb.0:
398; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
399; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
400; AVX2-NEXT:    retq
401  %cmp = icmp sgt <8 x i32> %a, %b
402  %zext = zext <8 x i1> %cmp to <8 x i32>
403  ret <8 x i32> %zext
404}
405
406define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
407; SSE2-LABEL: cmpgt_zext_v2i64:
408; SSE2:       # %bb.0:
409; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
410; SSE2-NEXT:    pxor %xmm2, %xmm1
411; SSE2-NEXT:    pxor %xmm2, %xmm0
412; SSE2-NEXT:    movdqa %xmm0, %xmm2
413; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
414; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
415; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
416; SSE2-NEXT:    pand %xmm2, %xmm1
417; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
418; SSE2-NEXT:    por %xmm1, %xmm0
419; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
420; SSE2-NEXT:    retq
421;
422; SSE42-LABEL: cmpgt_zext_v2i64:
423; SSE42:       # %bb.0:
424; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
425; SSE42-NEXT:    psrlq $63, %xmm0
426; SSE42-NEXT:    retq
427;
428; AVX-LABEL: cmpgt_zext_v2i64:
429; AVX:       # %bb.0:
430; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
431; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
432; AVX-NEXT:    retq
433  %cmp = icmp sgt <2 x i64> %a, %b
434  %zext = zext <2 x i1> %cmp to <2 x i64>
435  ret <2 x i64> %zext
436}
437
438; Test that we optimize a zext of a vector setcc ne zero where all bits but the
439; lsb are known to be zero.
440define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
441; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
442; SSE2:       # %bb.0:
443; SSE2-NEXT:    movdqa %xmm0, %xmm1
444; SSE2-NEXT:    psrlw $15, %xmm1
445; SSE2-NEXT:    pxor %xmm2, %xmm2
446; SSE2-NEXT:    movdqa %xmm1, %xmm0
447; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
448; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
449; SSE2-NEXT:    retq
450;
451; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
452; SSE42:       # %bb.0:
453; SSE42-NEXT:    movdqa %xmm0, %xmm1
454; SSE42-NEXT:    psrlw $15, %xmm1
455; SSE42-NEXT:    pxor %xmm2, %xmm2
456; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
457; SSE42-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
458; SSE42-NEXT:    retq
459;
460; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
461; AVX1:       # %bb.0:
462; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
463; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
464; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
465; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
466; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
467; AVX1-NEXT:    retq
468;
469; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
470; AVX2:       # %bb.0:
471; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
472; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
473; AVX2-NEXT:    retq
474  %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
475  %b = icmp ne <8 x i16> %a, zeroinitializer
476  %c = zext <8 x i1> %b to <8 x i32>
477  ret <8 x i32> %c
478}
479
480define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
481; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
482; SSE:       # %bb.0:
483; SSE-NEXT:    psrld $31, %xmm0
484; SSE-NEXT:    psrld $31, %xmm1
485; SSE-NEXT:    retq
486;
487; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
488; AVX1:       # %bb.0:
489; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
490; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
491; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
492; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
493; AVX1-NEXT:    retq
494;
495; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
496; AVX2:       # %bb.0:
497; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
498; AVX2-NEXT:    retq
499  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
500  %b = icmp ne <8 x i32> %a, zeroinitializer
501  %c = zext <8 x i1> %b to <8 x i32>
502  ret <8 x i32> %c
503}
504
505define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
506; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
507; SSE2:       # %bb.0:
508; SSE2-NEXT:    psrld $31, %xmm1
509; SSE2-NEXT:    psrld $31, %xmm0
510; SSE2-NEXT:    packuswb %xmm1, %xmm0
511; SSE2-NEXT:    retq
512;
513; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
514; SSE42:       # %bb.0:
515; SSE42-NEXT:    psrld $31, %xmm1
516; SSE42-NEXT:    psrld $31, %xmm0
517; SSE42-NEXT:    packusdw %xmm1, %xmm0
518; SSE42-NEXT:    retq
519;
520; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
521; AVX1:       # %bb.0:
522; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
523; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
524; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
525; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
526; AVX1-NEXT:    vzeroupper
527; AVX1-NEXT:    retq
528;
529; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
530; AVX2:       # %bb.0:
531; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
532; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
533; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
534; AVX2-NEXT:    vzeroupper
535; AVX2-NEXT:    retq
536  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
537  %b = icmp ne <8 x i32> %a, zeroinitializer
538  %c = zext <8 x i1> %b to <8 x i16>
539  ret <8 x i16> %c
540}
541
542; PR26697
543define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) {
544; SSE-LABEL: cmpeq_one_mask_bit:
545; SSE:       # %bb.0:
546; SSE-NEXT:    psrad $31, %xmm0
547; SSE-NEXT:    retq
548;
549; AVX-LABEL: cmpeq_one_mask_bit:
550; AVX:       # %bb.0:
551; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
552; AVX-NEXT:    retq
553  %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
554  %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
555  %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32>
556  ret <4 x i32> %mask_bool_ext
557}
558