1; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
2; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; Test 2xCMOV patterns exposed after legalization.
7; One way to do that is with (select (fcmp une/oeq)), which gets
8; legalized to setp/setne.
9
10; CHECK-LABEL: test_select_fcmp_oeq_i32:
11
12; CMOV-NEXT: ucomiss  %xmm1, %xmm0
13; CMOV-NEXT: cmovnel  %esi, %edi
14; CMOV-NEXT: cmovpl  %esi, %edi
15; CMOV-NEXT: movl  %edi, %eax
16; CMOV-NEXT: retq
17
18; NOCMOV-NEXT:  flds  8(%esp)
19; NOCMOV-NEXT:  flds  4(%esp)
20; NOCMOV-NEXT:  fucompp
21; NOCMOV-NEXT:  fnstsw  %ax
22; NOCMOV-NEXT:  sahf
23; NOCMOV-NEXT:  leal  16(%esp), %eax
24; NOCMOV-NEXT:  jne  [[TBB:.LBB[0-9_]+]]
25; NOCMOV-NEXT:  jp  [[TBB]]
26; NOCMOV-NEXT:  leal  12(%esp), %eax
27; NOCMOV-NEXT:[[TBB]]:
28; NOCMOV-NEXT:  movl  (%eax), %eax
29; NOCMOV-NEXT:  retl
30define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
31entry:
32  %cmp = fcmp oeq float %a, %b
33  %r = select i1 %cmp, i32 %c, i32 %d
34  ret i32 %r
35}
36
37; CHECK-LABEL: test_select_fcmp_oeq_i64:
38
39; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
40; CMOV-NEXT:   cmovneq  %rsi, %rdi
41; CMOV-NEXT:   cmovpq  %rsi, %rdi
42; CMOV-NEXT:   movq  %rdi, %rax
43; CMOV-NEXT:   retq
44
45; NOCMOV-NEXT:   flds  8(%esp)
46; NOCMOV-NEXT:   flds  4(%esp)
47; NOCMOV-NEXT:   fucompp
48; NOCMOV-NEXT:   fnstsw  %ax
49; NOCMOV-NEXT:   sahf
50; NOCMOV-NEXT:   leal  20(%esp), %ecx
51; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
52; NOCMOV-NEXT:   jp  [[TBB]]
53; NOCMOV-NEXT:   leal  12(%esp), %ecx
54; NOCMOV-NEXT: [[TBB]]:
55; NOCMOV-NEXT:   movl  (%ecx), %eax
56; NOCMOV-NEXT:   orl  $4, %ecx
57; NOCMOV-NEXT:   movl  (%ecx), %edx
58; NOCMOV-NEXT:   retl
59define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
60entry:
61  %cmp = fcmp oeq float %a, %b
62  %r = select i1 %cmp, i64 %c, i64 %d
63  ret i64 %r
64}
65
66; CHECK-LABEL: test_select_fcmp_une_i64:
67
68; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
69; CMOV-NEXT:   cmovneq  %rdi, %rsi
70; CMOV-NEXT:   cmovpq  %rdi, %rsi
71; CMOV-NEXT:   movq  %rsi, %rax
72; CMOV-NEXT:   retq
73
74; NOCMOV-NEXT:   flds  8(%esp)
75; NOCMOV-NEXT:   flds  4(%esp)
76; NOCMOV-NEXT:   fucompp
77; NOCMOV-NEXT:   fnstsw  %ax
78; NOCMOV-NEXT:   sahf
79; NOCMOV-NEXT:   leal  12(%esp), %ecx
80; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
81; NOCMOV-NEXT:   jp  [[TBB]]
82; NOCMOV-NEXT:   leal  20(%esp), %ecx
83; NOCMOV-NEXT: [[TBB]]:
84; NOCMOV-NEXT:   movl  (%ecx), %eax
85; NOCMOV-NEXT:   orl  $4, %ecx
86; NOCMOV-NEXT:   movl  (%ecx), %edx
87; NOCMOV-NEXT:   retl
88define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
89entry:
90  %cmp = fcmp une float %a, %b
91  %r = select i1 %cmp, i64 %c, i64 %d
92  ret i64 %r
93}
94
95; CHECK-LABEL: test_select_fcmp_oeq_f64:
96
97; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
98; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
99; CMOV-NEXT:   jp  [[TBB]]
100; CMOV-NEXT:   movaps  %xmm2, %xmm3
101; CMOV-NEXT: [[TBB]]:
102; CMOV-NEXT:   movaps  %xmm3, %xmm0
103; CMOV-NEXT:   retq
104
105; NOCMOV-NEXT:   flds  8(%esp)
106; NOCMOV-NEXT:   flds  4(%esp)
107; NOCMOV-NEXT:   fucompp
108; NOCMOV-NEXT:   fnstsw  %ax
109; NOCMOV-NEXT:   sahf
110; NOCMOV-NEXT:   leal  20(%esp), %eax
111; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
112; NOCMOV-NEXT:   jp  [[TBB]]
113; NOCMOV-NEXT:   leal  12(%esp), %eax
114; NOCMOV-NEXT: [[TBB]]:
115; NOCMOV-NEXT:   fldl  (%eax)
116; NOCMOV-NEXT:   retl
117define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
118entry:
119  %cmp = fcmp oeq float %a, %b
120  %r = select i1 %cmp, double %c, double %d
121  ret double %r
122}
123
124; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
125
126; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
127; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
128; CMOV-NEXT:   jp  [[TBB]]
129; CMOV-NEXT:   movaps  %xmm2, %xmm3
130; CMOV-NEXT: [[TBB]]:
131; CMOV-NEXT:   movaps  %xmm3, %xmm0
132; CMOV-NEXT:   retq
133
134; NOCMOV-NEXT:   pushl  %edi
135; NOCMOV-NEXT:   pushl  %esi
136; NOCMOV-NEXT:   flds  20(%esp)
137; NOCMOV-NEXT:   flds  16(%esp)
138; NOCMOV-NEXT:   fucompp
139; NOCMOV-NEXT:   fnstsw  %ax
140; NOCMOV-NEXT:   sahf
141; NOCMOV-NEXT:   leal  40(%esp), %eax
142; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
143; NOCMOV-NEXT:   jp  [[TBB]]
144; NOCMOV-NEXT:   leal  24(%esp), %eax
145; NOCMOV-NEXT: [[TBB]]:
146; NOCMOV-NEXT:   movl  (%eax), %ecx
147; NOCMOV-NEXT:   leal  44(%esp), %edx
148; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
149; NOCMOV-NEXT:   jp  [[TBB]]
150; NOCMOV-NEXT:   leal  28(%esp), %edx
151; NOCMOV-NEXT: [[TBB]]:
152; NOCMOV-NEXT:   movl  12(%esp), %eax
153; NOCMOV-NEXT:   movl  (%edx), %edx
154; NOCMOV-NEXT:   leal  48(%esp), %esi
155; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
156; NOCMOV-NEXT:   jp  [[TBB]]
157; NOCMOV-NEXT:   leal  32(%esp), %esi
158; NOCMOV-NEXT: [[TBB]]:
159; NOCMOV-NEXT:   movl  (%esi), %esi
160; NOCMOV-NEXT:   leal  52(%esp), %edi
161; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
162; NOCMOV-NEXT:   jp  [[TBB]]
163; NOCMOV-NEXT:   leal  36(%esp), %edi
164; NOCMOV-NEXT: [[TBB]]:
165; NOCMOV-NEXT:   movl  (%edi), %edi
166; NOCMOV-NEXT:   movl  %edi, 12(%eax)
167; NOCMOV-NEXT:   movl  %esi, 8(%eax)
168; NOCMOV-NEXT:   movl  %edx, 4(%eax)
169; NOCMOV-NEXT:   movl  %ecx, (%eax)
170; NOCMOV-NEXT:   popl  %esi
171; NOCMOV-NEXT:   popl  %edi
172; NOCMOV-NEXT:   retl  $4
173define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
174entry:
175  %cmp = fcmp oeq float %a, %b
176  %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
177  ret <4 x i32> %r
178}
179
180; Also make sure we catch the original code-sequence of interest:
181
182; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
183; CMOV-NEXT:   .long  1065353216
184
185; CHECK-LABEL: test_zext_fcmp_une:
186; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
187; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
188; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
189; CMOV-NEXT:   jp  [[TBB]]
190; CMOV-NEXT:   xorps  %xmm0, %xmm0
191; CMOV-NEXT: [[TBB]]:
192; CMOV-NEXT:   retq
193
194; NOCMOV:        jne
195; NOCMOV-NEXT:   jp
196define float @test_zext_fcmp_une(float %a, float %b) #0 {
197entry:
198  %cmp = fcmp une float %a, %b
199  %conv1 = zext i1 %cmp to i32
200  %conv2 = sitofp i32 %conv1 to float
201  ret float %conv2
202}
203
204; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
205; CMOV-NEXT:   .long  1065353216
206
207; CHECK-LABEL: test_zext_fcmp_oeq:
208; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
209; CMOV-NEXT:   xorps  %xmm0, %xmm0
210; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
211; CMOV-NEXT:   jp  [[TBB]]
212; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
213; CMOV-NEXT: [[TBB]]:
214; CMOV-NEXT:   retq
215
216; NOCMOV:        jne
217; NOCMOV-NEXT:   jp
218define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
219entry:
220  %cmp = fcmp oeq float %a, %b
221  %conv1 = zext i1 %cmp to i32
222  %conv2 = sitofp i32 %conv1 to float
223  ret float %conv2
224}
225
226attributes #0 = { nounwind }
227
228@g8 = global i8 0
229
230; The following test failed because llvm had a bug where a structure like:
231;
232; %vreg12<def> = CMOV_GR8 %vreg7, %vreg11 ... (lt)
233; %vreg13<def> = CMOV_GR8 %vreg12, %vreg11 ... (gt)
234;
235; was lowered to:
236;
237; The first two cmovs got expanded to:
238; BB#0:
239;   JL_1 BB#9
240; BB#7:
241;   JG_1 BB#9
242; BB#8:
243; BB#9:
244;   vreg12 = phi(vreg7, BB#8, vreg11, BB#0, vreg12, BB#7)
245;   vreg13 = COPY vreg12
246; Which was invalid as %vreg12 is not the same value as %vreg13
247
248; CHECK-LABEL: no_cascade_opt:
249; CMOV-DAG: cmpl %edx, %esi
250; CMOV-DAG: movb $20, %al
251; CMOV-DAG: movb $20, %dl
252; CMOV:   jl [[BB0:.LBB[0-9_]+]]
253; CMOV:   movl %ecx, %edx
254; CMOV: [[BB0]]:
255; CMOV:   jg [[BB1:.LBB[0-9_]+]]
256; CMOV:   movl %edx, %eax
257; CMOV: [[BB1]]:
258; CMOV:   testl %edi, %edi
259; CMOV:   je [[BB2:.LBB[0-9_]+]]
260; CMOV:   movl %edx, %eax
261; CMOV: [[BB2]]:
262; CMOV:   movb %al, g8(%rip)
263; CMOV:   retq
264define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {
265entry:
266  %c0 = icmp eq i32 %v0, 0
267  %c1 = icmp slt i32 %v1, %v2
268  %c2 = icmp sgt i32 %v1, %v2
269  %trunc = trunc i32 %v3 to i8
270  %sel0 = select i1 %c1, i8 20, i8 %trunc
271  %sel1 = select i1 %c2, i8 20, i8 %sel0
272  %sel2 = select i1 %c0, i8 %sel1, i8 %sel0
273  store volatile i8 %sel2, i8* @g8
274  ret void
275}
276