1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2,-avx | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx | FileCheck %s --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2   | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
6
7define i32 @veccond128(<4 x i32> %input) {
8; SSE2-LABEL: veccond128:
9; SSE2:       # %bb.0: # %entry
10; SSE2-NEXT:    pxor %xmm1, %xmm1
11; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
12; SSE2-NEXT:    pmovmskb %xmm1, %eax
13; SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
14; SSE2-NEXT:    je .LBB0_2
15; SSE2-NEXT:  # %bb.1: # %if-true-block
16; SSE2-NEXT:    xorl %eax, %eax
17; SSE2-NEXT:    retq
18; SSE2-NEXT:  .LBB0_2: # %endif-block
19; SSE2-NEXT:    movl $1, %eax
20; SSE2-NEXT:    retq
21;
22; SSE41-LABEL: veccond128:
23; SSE41:       # %bb.0: # %entry
24; SSE41-NEXT:    ptest %xmm0, %xmm0
25; SSE41-NEXT:    je .LBB0_2
26; SSE41-NEXT:  # %bb.1: # %if-true-block
27; SSE41-NEXT:    xorl %eax, %eax
28; SSE41-NEXT:    retq
29; SSE41-NEXT:  .LBB0_2: # %endif-block
30; SSE41-NEXT:    movl $1, %eax
31; SSE41-NEXT:    retq
32;
33; AVX-LABEL: veccond128:
34; AVX:       # %bb.0: # %entry
35; AVX-NEXT:    vptest %xmm0, %xmm0
36; AVX-NEXT:    je .LBB0_2
37; AVX-NEXT:  # %bb.1: # %if-true-block
38; AVX-NEXT:    xorl %eax, %eax
39; AVX-NEXT:    retq
40; AVX-NEXT:  .LBB0_2: # %endif-block
41; AVX-NEXT:    movl $1, %eax
42; AVX-NEXT:    retq
43entry:
44  %0 = bitcast <4 x i32> %input to i128
45  %1 = icmp ne i128 %0, 0
46  br i1 %1, label %if-true-block, label %endif-block
47if-true-block:
48  ret i32 0
49endif-block:
50  ret i32 1
51}
52
53define i32 @veccond256(<8 x i32> %input) {
54; SSE2-LABEL: veccond256:
55; SSE2:       # %bb.0: # %entry
56; SSE2-NEXT:    por %xmm1, %xmm0
57; SSE2-NEXT:    pxor %xmm1, %xmm1
58; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
59; SSE2-NEXT:    pmovmskb %xmm1, %eax
60; SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
61; SSE2-NEXT:    je .LBB1_2
62; SSE2-NEXT:  # %bb.1: # %if-true-block
63; SSE2-NEXT:    xorl %eax, %eax
64; SSE2-NEXT:    retq
65; SSE2-NEXT:  .LBB1_2: # %endif-block
66; SSE2-NEXT:    movl $1, %eax
67; SSE2-NEXT:    retq
68;
69; SSE41-LABEL: veccond256:
70; SSE41:       # %bb.0: # %entry
71; SSE41-NEXT:    por %xmm1, %xmm0
72; SSE41-NEXT:    ptest %xmm0, %xmm0
73; SSE41-NEXT:    je .LBB1_2
74; SSE41-NEXT:  # %bb.1: # %if-true-block
75; SSE41-NEXT:    xorl %eax, %eax
76; SSE41-NEXT:    retq
77; SSE41-NEXT:  .LBB1_2: # %endif-block
78; SSE41-NEXT:    movl $1, %eax
79; SSE41-NEXT:    retq
80;
81; AVX-LABEL: veccond256:
82; AVX:       # %bb.0: # %entry
83; AVX-NEXT:    vptest %ymm0, %ymm0
84; AVX-NEXT:    je .LBB1_2
85; AVX-NEXT:  # %bb.1: # %if-true-block
86; AVX-NEXT:    xorl %eax, %eax
87; AVX-NEXT:    vzeroupper
88; AVX-NEXT:    retq
89; AVX-NEXT:  .LBB1_2: # %endif-block
90; AVX-NEXT:    movl $1, %eax
91; AVX-NEXT:    vzeroupper
92; AVX-NEXT:    retq
93entry:
94  %0 = bitcast <8 x i32> %input to i256
95  %1 = icmp ne i256 %0, 0
96  br i1 %1, label %if-true-block, label %endif-block
97if-true-block:
98  ret i32 0
99endif-block:
100  ret i32 1
101}
102
103define i32 @veccond512(<16 x i32> %input) {
104; SSE2-LABEL: veccond512:
105; SSE2:       # %bb.0: # %entry
106; SSE2-NEXT:    por %xmm3, %xmm1
107; SSE2-NEXT:    por %xmm2, %xmm1
108; SSE2-NEXT:    por %xmm0, %xmm1
109; SSE2-NEXT:    pxor %xmm0, %xmm0
110; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
111; SSE2-NEXT:    pmovmskb %xmm0, %eax
112; SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
113; SSE2-NEXT:    je .LBB2_2
114; SSE2-NEXT:  # %bb.1: # %if-true-block
115; SSE2-NEXT:    xorl %eax, %eax
116; SSE2-NEXT:    retq
117; SSE2-NEXT:  .LBB2_2: # %endif-block
118; SSE2-NEXT:    movl $1, %eax
119; SSE2-NEXT:    retq
120;
121; SSE41-LABEL: veccond512:
122; SSE41:       # %bb.0: # %entry
123; SSE41-NEXT:    por %xmm3, %xmm1
124; SSE41-NEXT:    por %xmm2, %xmm1
125; SSE41-NEXT:    por %xmm0, %xmm1
126; SSE41-NEXT:    ptest %xmm1, %xmm1
127; SSE41-NEXT:    je .LBB2_2
128; SSE41-NEXT:  # %bb.1: # %if-true-block
129; SSE41-NEXT:    xorl %eax, %eax
130; SSE41-NEXT:    retq
131; SSE41-NEXT:  .LBB2_2: # %endif-block
132; SSE41-NEXT:    movl $1, %eax
133; SSE41-NEXT:    retq
134;
135; AVX1-LABEL: veccond512:
136; AVX1:       # %bb.0: # %entry
137; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
138; AVX1-NEXT:    vptest %ymm0, %ymm0
139; AVX1-NEXT:    je .LBB2_2
140; AVX1-NEXT:  # %bb.1: # %if-true-block
141; AVX1-NEXT:    xorl %eax, %eax
142; AVX1-NEXT:    vzeroupper
143; AVX1-NEXT:    retq
144; AVX1-NEXT:  .LBB2_2: # %endif-block
145; AVX1-NEXT:    movl $1, %eax
146; AVX1-NEXT:    vzeroupper
147; AVX1-NEXT:    retq
148;
149; AVX512-LABEL: veccond512:
150; AVX512:       # %bb.0: # %entry
151; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
152; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
153; AVX512-NEXT:    vptest %ymm0, %ymm0
154; AVX512-NEXT:    je .LBB2_2
155; AVX512-NEXT:  # %bb.1: # %if-true-block
156; AVX512-NEXT:    xorl %eax, %eax
157; AVX512-NEXT:    vzeroupper
158; AVX512-NEXT:    retq
159; AVX512-NEXT:  .LBB2_2: # %endif-block
160; AVX512-NEXT:    movl $1, %eax
161; AVX512-NEXT:    vzeroupper
162; AVX512-NEXT:    retq
163entry:
164  %0 = bitcast <16 x i32> %input to i512
165  %1 = icmp ne i512 %0, 0
166  br i1 %1, label %if-true-block, label %endif-block
167if-true-block:
168  ret i32 0
169endif-block:
170  ret i32 1
171}
172
173define i32 @vectest128(<4 x i32> %input) {
174; SSE2-LABEL: vectest128:
175; SSE2:       # %bb.0:
176; SSE2-NEXT:    pxor %xmm1, %xmm1
177; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
178; SSE2-NEXT:    pmovmskb %xmm1, %ecx
179; SSE2-NEXT:    xorl %eax, %eax
180; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
181; SSE2-NEXT:    setne %al
182; SSE2-NEXT:    retq
183;
184; SSE41-LABEL: vectest128:
185; SSE41:       # %bb.0:
186; SSE41-NEXT:    xorl %eax, %eax
187; SSE41-NEXT:    ptest %xmm0, %xmm0
188; SSE41-NEXT:    setne %al
189; SSE41-NEXT:    retq
190;
191; AVX-LABEL: vectest128:
192; AVX:       # %bb.0:
193; AVX-NEXT:    xorl %eax, %eax
194; AVX-NEXT:    vptest %xmm0, %xmm0
195; AVX-NEXT:    setne %al
196; AVX-NEXT:    retq
197  %t0 = bitcast <4 x i32> %input to i128
198  %t1 = icmp ne i128 %t0, 0
199  %t2 = zext i1 %t1 to i32
200  ret i32 %t2
201}
202
203define i32 @vectest256(<8 x i32> %input) {
204; SSE2-LABEL: vectest256:
205; SSE2:       # %bb.0:
206; SSE2-NEXT:    por %xmm1, %xmm0
207; SSE2-NEXT:    pxor %xmm1, %xmm1
208; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
209; SSE2-NEXT:    pmovmskb %xmm1, %ecx
210; SSE2-NEXT:    xorl %eax, %eax
211; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
212; SSE2-NEXT:    setne %al
213; SSE2-NEXT:    retq
214;
215; SSE41-LABEL: vectest256:
216; SSE41:       # %bb.0:
217; SSE41-NEXT:    por %xmm1, %xmm0
218; SSE41-NEXT:    xorl %eax, %eax
219; SSE41-NEXT:    ptest %xmm0, %xmm0
220; SSE41-NEXT:    setne %al
221; SSE41-NEXT:    retq
222;
223; AVX-LABEL: vectest256:
224; AVX:       # %bb.0:
225; AVX-NEXT:    xorl %eax, %eax
226; AVX-NEXT:    vptest %ymm0, %ymm0
227; AVX-NEXT:    setne %al
228; AVX-NEXT:    vzeroupper
229; AVX-NEXT:    retq
230  %t0 = bitcast <8 x i32> %input to i256
231  %t1 = icmp ne i256 %t0, 0
232  %t2 = zext i1 %t1 to i32
233  ret i32 %t2
234}
235
236define i32 @vectest512(<16 x i32> %input) {
237; SSE2-LABEL: vectest512:
238; SSE2:       # %bb.0:
239; SSE2-NEXT:    por %xmm3, %xmm1
240; SSE2-NEXT:    por %xmm2, %xmm1
241; SSE2-NEXT:    por %xmm0, %xmm1
242; SSE2-NEXT:    pxor %xmm0, %xmm0
243; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
244; SSE2-NEXT:    pmovmskb %xmm0, %ecx
245; SSE2-NEXT:    xorl %eax, %eax
246; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
247; SSE2-NEXT:    setne %al
248; SSE2-NEXT:    retq
249;
250; SSE41-LABEL: vectest512:
251; SSE41:       # %bb.0:
252; SSE41-NEXT:    por %xmm3, %xmm1
253; SSE41-NEXT:    por %xmm2, %xmm1
254; SSE41-NEXT:    por %xmm0, %xmm1
255; SSE41-NEXT:    xorl %eax, %eax
256; SSE41-NEXT:    ptest %xmm1, %xmm1
257; SSE41-NEXT:    setne %al
258; SSE41-NEXT:    retq
259;
260; AVX1-LABEL: vectest512:
261; AVX1:       # %bb.0:
262; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
263; AVX1-NEXT:    xorl %eax, %eax
264; AVX1-NEXT:    vptest %ymm0, %ymm0
265; AVX1-NEXT:    setne %al
266; AVX1-NEXT:    vzeroupper
267; AVX1-NEXT:    retq
268;
269; AVX512-LABEL: vectest512:
270; AVX512:       # %bb.0:
271; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
272; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
273; AVX512-NEXT:    xorl %eax, %eax
274; AVX512-NEXT:    vptest %ymm0, %ymm0
275; AVX512-NEXT:    setne %al
276; AVX512-NEXT:    vzeroupper
277; AVX512-NEXT:    retq
278  %t0 = bitcast <16 x i32> %input to i512
279  %t1 = icmp ne i512 %t0, 0
280  %t2 = zext i1 %t1 to i32
281  ret i32 %t2
282}
283
284define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
285; SSE2-LABEL: vecsel128:
286; SSE2:       # %bb.0:
287; SSE2-NEXT:    movl %edi, %eax
288; SSE2-NEXT:    pxor %xmm1, %xmm1
289; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
290; SSE2-NEXT:    pmovmskb %xmm1, %ecx
291; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
292; SSE2-NEXT:    cmovel %esi, %eax
293; SSE2-NEXT:    retq
294;
295; SSE41-LABEL: vecsel128:
296; SSE41:       # %bb.0:
297; SSE41-NEXT:    movl %edi, %eax
298; SSE41-NEXT:    ptest %xmm0, %xmm0
299; SSE41-NEXT:    cmovel %esi, %eax
300; SSE41-NEXT:    retq
301;
302; AVX-LABEL: vecsel128:
303; AVX:       # %bb.0:
304; AVX-NEXT:    movl %edi, %eax
305; AVX-NEXT:    vptest %xmm0, %xmm0
306; AVX-NEXT:    cmovel %esi, %eax
307; AVX-NEXT:    retq
308  %t0 = bitcast <4 x i32> %input to i128
309  %t1 = icmp ne i128 %t0, 0
310  %t2 = select i1 %t1, i32 %a, i32 %b
311  ret i32 %t2
312}
313
314define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
315; SSE2-LABEL: vecsel256:
316; SSE2:       # %bb.0:
317; SSE2-NEXT:    movl %edi, %eax
318; SSE2-NEXT:    por %xmm1, %xmm0
319; SSE2-NEXT:    pxor %xmm1, %xmm1
320; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
321; SSE2-NEXT:    pmovmskb %xmm1, %ecx
322; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
323; SSE2-NEXT:    cmovel %esi, %eax
324; SSE2-NEXT:    retq
325;
326; SSE41-LABEL: vecsel256:
327; SSE41:       # %bb.0:
328; SSE41-NEXT:    movl %edi, %eax
329; SSE41-NEXT:    por %xmm1, %xmm0
330; SSE41-NEXT:    ptest %xmm0, %xmm0
331; SSE41-NEXT:    cmovel %esi, %eax
332; SSE41-NEXT:    retq
333;
334; AVX-LABEL: vecsel256:
335; AVX:       # %bb.0:
336; AVX-NEXT:    movl %edi, %eax
337; AVX-NEXT:    vptest %ymm0, %ymm0
338; AVX-NEXT:    cmovel %esi, %eax
339; AVX-NEXT:    vzeroupper
340; AVX-NEXT:    retq
341  %t0 = bitcast <8 x i32> %input to i256
342  %t1 = icmp ne i256 %t0, 0
343  %t2 = select i1 %t1, i32 %a, i32 %b
344  ret i32 %t2
345}
346
347define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
348; SSE2-LABEL: vecsel512:
349; SSE2:       # %bb.0:
350; SSE2-NEXT:    movl %edi, %eax
351; SSE2-NEXT:    por %xmm3, %xmm1
352; SSE2-NEXT:    por %xmm2, %xmm1
353; SSE2-NEXT:    por %xmm0, %xmm1
354; SSE2-NEXT:    pxor %xmm0, %xmm0
355; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
356; SSE2-NEXT:    pmovmskb %xmm0, %ecx
357; SSE2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
358; SSE2-NEXT:    cmovel %esi, %eax
359; SSE2-NEXT:    retq
360;
361; SSE41-LABEL: vecsel512:
362; SSE41:       # %bb.0:
363; SSE41-NEXT:    movl %edi, %eax
364; SSE41-NEXT:    por %xmm3, %xmm1
365; SSE41-NEXT:    por %xmm2, %xmm1
366; SSE41-NEXT:    por %xmm0, %xmm1
367; SSE41-NEXT:    ptest %xmm1, %xmm1
368; SSE41-NEXT:    cmovel %esi, %eax
369; SSE41-NEXT:    retq
370;
371; AVX1-LABEL: vecsel512:
372; AVX1:       # %bb.0:
373; AVX1-NEXT:    movl %edi, %eax
374; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
375; AVX1-NEXT:    vptest %ymm0, %ymm0
376; AVX1-NEXT:    cmovel %esi, %eax
377; AVX1-NEXT:    vzeroupper
378; AVX1-NEXT:    retq
379;
380; AVX512-LABEL: vecsel512:
381; AVX512:       # %bb.0:
382; AVX512-NEXT:    movl %edi, %eax
383; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
384; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
385; AVX512-NEXT:    vptest %ymm0, %ymm0
386; AVX512-NEXT:    cmovel %esi, %eax
387; AVX512-NEXT:    vzeroupper
388; AVX512-NEXT:    retq
389  %t0 = bitcast <16 x i32> %input to i512
390  %t1 = icmp ne i512 %t0, 0
391  %t2 = select i1 %t1, i32 %a, i32 %b
392  ret i32 %t2
393}
394