1; This tests the optimization where producers and consumers of i1 (bool)
2; variables are combined to implicitly use flags instead of explicitly using
3; stack or register variables.
4
5; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \
6; RUN:   -allow-externally-defined-symbols | FileCheck %s
7
8; RUN: %if --need=target_ARM32 --command %p2i --filetype=obj \
9; RUN:   --target arm32 -i %s --disassemble --args -O2 \
10; RUN:   -allow-externally-defined-symbols \
11; RUN:   | %if --need=target_ARM32 --command FileCheck %s \
12; RUN:   --check-prefix=ARM32
13
14declare void @use_value(i32)
15
16; Basic cmp/branch folding.
17define internal i32 @fold_cmp_br(i32 %arg1, i32 %arg2) {
18entry:
19  %cmp1 = icmp slt i32 %arg1, %arg2
20  br i1 %cmp1, label %branch1, label %branch2
21branch1:
22  ret i32 1
23branch2:
24  ret i32 2
25}
26
27; CHECK-LABEL: fold_cmp_br
28; CHECK: cmp
29; CHECK: jge
30; ARM32-LABEL: fold_cmp_br
31; ARM32: cmp r0, r1
32; ARM32: bge
33; ARM32: mov r0, #1
34; ARM32: bx lr
35; ARM32: mov r0, #2
36; ARM32: bx lr
37
38
39; Cmp/branch folding with intervening instructions.
40define internal i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) {
41entry:
42  %cmp1 = icmp slt i32 %arg1, %arg2
43  call void @use_value(i32 %arg1)
44  br i1 %cmp1, label %branch1, label %branch2
45branch1:
46  ret i32 1
47branch2:
48  ret i32 2
49}
50
51; CHECK-LABEL: fold_cmp_br_intervening_insts
52; CHECK-NOT: cmp
53; CHECK: call
54; CHECK: cmp
55; CHECK: jge
56; ARM32-LABEL: fold_cmp_br_intervening_insts
57; ARM32: push {{[{].*[}]}}
58; ARM32: bl{{.*}}use_value
59; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}}
60; ARM32: bge
61; ARM32: mov r0, #1
62; ARM32: bx lr
63; ARM32: mov r0, #2
64; ARM32: bx lr
65
66
67; Cmp/branch non-folding because of live-out.
68define internal i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) {
69entry:
70  %cmp1 = icmp slt i32 %arg1, %arg2
71  br label %next
72next:
73  br i1 %cmp1, label %branch1, label %branch2
74branch1:
75  ret i32 1
76branch2:
77  ret i32 2
78}
79
80; CHECK-LABEL: no_fold_cmp_br_liveout
81; CHECK: cmp
82; CHECK: set
83; CHECK: cmp
84; CHECK: je
85; ARM32-LABEL: no_fold_cmp_br_liveout
86; ARM32: cmp
87; ARM32: movlt [[REG:r[0-9]+]]
88; ARM32: tst [[REG]], #1
89; ARM32: beq
90
91
92; Cmp/branch non-folding because of extra non-whitelisted uses.
93define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) {
94entry:
95  %cmp1 = icmp slt i32 %arg1, %arg2
96  %result = zext i1 %cmp1 to i32
97  br i1 %cmp1, label %branch1, label %branch2
98branch1:
99  ret i32 %result
100branch2:
101  ret i32 2
102}
103
104; CHECK-LABEL: no_fold_cmp_br_non_whitelist
105; CHECK: cmp
106; CHECK: set
107; CHECK: movzx
108; CHECK: cmp
109; CHECK: je
110; ARM32-LABEL: no_fold_cmp_br_non_whitelist
111; ARM32: mov [[R:r[0-9]+]], #0
112; ARM32: cmp r0, r1
113; ARM32: movlt [[R]], #1
114; ARM32: tst [[R]], #1
115; ARM32: beq
116; ARM32: bx lr
117; ARM32: mov r0, #2
118; ARM32: bx lr
119
120
121; Basic cmp/select folding.
122define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) {
123entry:
124  %cmp1 = icmp slt i32 %arg1, %arg2
125  %result = select i1 %cmp1, i32 %arg1, i32 %arg2
126  ret i32 %result
127}
128
129; CHECK-LABEL: fold_cmp_select
130; CHECK: cmp
131; CHECK: cmovl
132; ARM32-LABEL: fold_cmp_select
133; ARM32: cmp r0, r1
134; ARM32: movlt {{r[0-9]+}}, r0
135
136; 64-bit cmp/select folding.
137define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) {
138entry:
139  %arg1_trunc = trunc i64 %arg1 to i32
140  %arg2_trunc = trunc i64 %arg2 to i32
141  %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc
142  %result = select i1 %cmp1, i64 %arg1, i64 %arg2
143  ret i64 %result
144}
145
146; CHECK-LABEL: fold_cmp_select_64
147; CHECK: cmp
148; CHECK: cmovl
149; CHECK: cmovl
150; ARM32-LABEL: fold_cmp_select_64
151; ARM32: cmp r0, r2
152; ARM32: movlt [[LOW:r[0-9]+]], r0
153; ARM32: movlt [[HIGH:r[0-9]+]], r1
154; ARM32: mov r0, [[LOW]]
155; ARM32: mov r1, [[HIGH]]
156; ARM32: bx lr
157
158
159define internal i64 @fold_cmp_select_64_undef(i64 %arg1) {
160entry:
161  %arg1_trunc = trunc i64 %arg1 to i32
162  %cmp1 = icmp slt i32 undef, %arg1_trunc
163  %result = select i1 %cmp1, i64 %arg1, i64 undef
164  ret i64 %result
165}
166; CHECK-LABEL: fold_cmp_select_64_undef
167; CHECK: cmp
168; CHECK: cmovl
169; CHECK: cmovl
170; ARM32-LABEL: fold_cmp_select_64_undef
171; ARM32: mov
172; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0
173; ARM32: movlt
174; ARM32: movlt
175; ARM32: bx lr
176
177
178; Cmp/select folding with intervening instructions.
179define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) {
180entry:
181  %cmp1 = icmp slt i32 %arg1, %arg2
182  call void @use_value(i32 %arg1)
183  %result = select i1 %cmp1, i32 %arg1, i32 %arg2
184  ret i32 %result
185}
186
187; CHECK-LABEL: fold_cmp_select_intervening_insts
188; CHECK-NOT: cmp
189; CHECK: call
190; CHECK: cmp
191; CHECK: cmovl
192; ARM32-LABEL: fold_cmp_select_intervening_insts
193; ARM32: bl{{.*}}use_value
194; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}}
195; ARM32: movlt
196; ARM32: bx lr
197
198; Cmp/multi-select folding.
199define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) {
200entry:
201  %cmp1 = icmp slt i32 %arg1, %arg2
202  %a = select i1 %cmp1, i32 %arg1, i32 %arg2
203  %b = select i1 %cmp1, i32 %arg2, i32 %arg1
204  %c = select i1 %cmp1, i32 123, i32 %arg1
205  %partial = add i32 %a, %b
206  %result = add i32 %partial, %c
207  ret i32 %result
208}
209
210; CHECK-LABEL: fold_cmp_select_multi
211; CHECK: cmp
212; CHECK: cmovl
213; CHECK: cmp
214; CHECK: cmovl
215; CHECK: cmp
216; CHECK: cmovge
217; CHECK: add
218; CHECK: add
219; ARM32-LABEL: fold_cmp_select_multi
220; ARM32: mov
221; ARM32: cmp
222; ARM32: movlt {{.*}}, #1
223; ARM32: mov
224; ARM32: tst {{.*}}, #1
225; ARM32: movne
226; ARM32: mov
227; ARM32: tst {{.*}}, #1
228; ARM32: movne
229; ARM32: tst {{.*}}, #1
230; ARM32: movne {{.*}}, #123
231; ARM32: bx lr
232
233
234; Cmp/multi-select non-folding because of live-out.
235define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) {
236entry:
237  %cmp1 = icmp slt i32 %arg1, %arg2
238  %a = select i1 %cmp1, i32 %arg1, i32 %arg2
239  %b = select i1 %cmp1, i32 %arg2, i32 %arg1
240  br label %next
241next:
242  %c = select i1 %cmp1, i32 123, i32 %arg1
243  %partial = add i32 %a, %b
244  %result = add i32 %partial, %c
245  ret i32 %result
246}
247
248; CHECK-LABEL: no_fold_cmp_select_multi_liveout
249; CHECK: set
250; CHECK: cmp
251; CHECK: cmovne
252; CHECK: cmp
253; CHECK: cmovne
254; CHECK: cmp
255; CHECK: cmove
256; CHECK: add
257; CHECK: add
258; ARM32-LABEL: no_fold_cmp_select_multi_liveout
259; ARM32: mov
260; ARM32: cmp r0, r1
261; ARM32: movlt
262; ARM32: mov
263; ARM32: tst
264; ARM32: movne
265; ARM32: mov
266; ARM32: tst
267; ARM32: movne
268; ARM32: tst
269; ARM32: movne
270; ARM32: bx lr
271
272; Cmp/branch non-folding due to load folding and intervening store.
273define internal i32 @no_fold_cmp_br_store(i32 %arg2, i32 %argaddr) {
274entry:
275  %addr = inttoptr i32 %argaddr to i32*
276  %arg1 = load i32, i32* %addr, align 1
277  %cmp1 = icmp slt i32 %arg1, %arg2
278  store i32 1, i32* %addr, align 1
279  br i1 %cmp1, label %branch1, label %branch2
280branch1:
281  ret i32 1
282branch2:
283  ret i32 2
284}
285
286; CHECK-LABEL: no_fold_cmp_br_store
287; CHECK: cmp
288; CHECK: set
289; CHECK: cmp
290
291; Cmp/select non-folding due to load folding and intervening store.
292define internal i32 @no_fold_cmp_select_store(i32 %arg1, i32 %argaddr) {
293entry:
294  %addr = inttoptr i32 %argaddr to i32*
295  %arg2 = load i32, i32* %addr, align 1
296  %cmp1 = icmp slt i32 %arg1, %arg2
297  store i32 1, i32* %addr, align 1
298  %result = select i1 %cmp1, i32 %arg1, i32 %argaddr
299  ret i32 %result
300}
301
302; CHECK-LABEL: no_fold_cmp_select_store
303; CHECK: cmp
304; CHECK: setl
305; CHECK: mov DWORD PTR
306; CHECK: cmp
307; CHECK: cmovne
308
309; Cmp/select folding due to load folding and non-intervening store.
310define internal i32 @fold_cmp_select_store(i32 %arg1, i32 %argaddr) {
311entry:
312  %addr = inttoptr i32 %argaddr to i32*
313  %arg2 = load i32, i32* %addr, align 1
314  %cmp1 = icmp slt i32 %arg1, %arg2
315  %result = select i1 %cmp1, i32 %arg1, i32 %argaddr
316  store i32 1, i32* %addr, align 1
317  ret i32 %result
318}
319
320; CHECK-LABEL: fold_cmp_select_store
321; CHECK: cmp {{.*}},DWORD PTR
322; CHECK: cmovl
323
324; Cmp/multi-select non-folding because of extra non-whitelisted uses.
325define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1,
326                                                            i32 %arg2) {
327entry:
328  %cmp1 = icmp slt i32 %arg1, %arg2
329  %a = select i1 %cmp1, i32 %arg1, i32 %arg2
330  %b = select i1 %cmp1, i32 %arg2, i32 %arg1
331  %c = select i1 %cmp1, i32 123, i32 %arg1
332  %ext = zext i1 %cmp1 to i32
333  %partial1 = add i32 %a, %b
334  %partial2 = add i32 %partial1, %c
335  %result = add i32 %partial2, %ext
336  ret i32 %result
337}
338
339; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist
340; CHECK: set
341; CHECK: cmp
342; CHECK: cmovne
343; CHECK: cmp
344; CHECK: cmovne
345; CHECK: cmp
346; CHECK: cmove
347; CHECK: movzx
348; CHECK: add
349; CHECK: add
350; CHECK: add
351; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist
352; ARM32: mov
353; ARM32: cmp r0, r1
354; ARM32: movlt
355; ARM32: mov
356; ARM32: tst
357; ARM32: movne
358; ARM32: mov
359; ARM32: tst
360; ARM32: movne
361; ARM32: tst
362; ARM32: movne
363; ARM32: bx lr
364
365define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) {
366  %t0 = trunc i32 %arg1 to i1
367  %t1 = trunc i32 %arg2 to i1
368
369  %t2 = and i1 %t0, %t1
370  br i1 %t2, label %target_true, label %target_false
371
372target_true:
373  ret i32 1
374
375target_false:
376  ret i32 0
377}
378; ARM32-LABEL: br_i1_folding2_and
379; ARM32: tst r0, #1
380; ARM32: beq
381; ARM32: tst r1, #1
382; ARM32: beq
383
384define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) {
385  %t0 = trunc i32 %arg1 to i1
386  %t1 = trunc i32 %arg2 to i1
387
388  %t2 = or i1 %t0, %t1
389  br i1 %t2, label %target_true, label %target_false
390
391target_true:
392  ret i32 1
393
394target_false:
395  ret i32 0
396}
397; ARM32-LABEL: br_i1_folding2_or
398; ARM32: tst r0, #1
399; ARM32: bne
400; ARM32: tst r1, #1
401; ARM32: beq
402
403define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) {
404  %t0 = trunc i32 %arg1 to i1
405  %t1 = trunc i32 %arg2 to i1
406  %t2 = trunc i32 %arg3 to i1
407
408  %t3 = and i1 %t0, %t1
409  %t4 = or i1 %t3, %t2
410
411  br i1 %t4, label %target_true, label %target_false
412
413target_true:
414  ret i32 1
415
416target_false:
417  ret i32 0
418}
419; ARM32-LABEL: br_i1_folding3_and_or
420; ARM32: tst r0, #1
421; ARM32: beq
422; ARM32: tst r1, #1
423; ARM32: bne
424; ARM32: tst r2, #1
425; ARM32: beq
426
427define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) {
428  %t0 = trunc i32 %arg1 to i1
429  %t1 = trunc i32 %arg2 to i1
430  %t2 = trunc i32 %arg3 to i1
431
432  %t3 = or i1 %t0, %t1
433  %t4 = and i1 %t3, %t2
434
435  br i1 %t4, label %target_true, label %target_false
436
437target_true:
438  ret i32 1
439
440target_false:
441  ret i32 0
442}
443; ARM32-LABEL: br_i1_folding3_or_and
444; ARM32: tst r0, #1
445; ARM32: bne
446; ARM32: tst r1, #1
447; ARM32: beq
448; ARM32: tst r2, #1
449; ARM32: beq
450
451define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
452                                    i32 %arg5) {
453  %t0 = trunc i32 %arg1 to i1
454  %t1 = trunc i32 %arg2 to i1
455  %t2 = trunc i32 %arg3 to i1
456  %t3 = trunc i32 %arg4 to i1
457  %t4 = trunc i32 %arg5 to i1
458
459  %t5 = or i1 %t0, %t1
460  %t6 = and i1 %t5, %t2
461  %t7 = and i1 %t3, %t4
462  %t8 = or i1 %t6, %t7
463  br i1 %t8, label %target_true, label %target_false
464
465target_true:
466  ret i32 1
467
468target_false:
469  ret i32 0
470}
471; ARM32-LABEL: br_i1_folding4
472; ARM32: tst r0, #1
473; ARM32: bne
474; ARM32: tst r1, #1
475; ARM32: beq
476; ARM32: tst r2, #1
477; ARM32: bne
478; ARM32: tst     r3, #1
479; ARM32: beq     [[TARGET:.*]]
480; ARM32: tst     r4, #1
481; ARM32: beq     [[TARGET]]
482