1; This tests the optimization where producers and consumers of i1 (bool) 2; variables are combined to implicitly use flags instead of explicitly using 3; stack or register variables. 4 5; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ 6; RUN: -allow-externally-defined-symbols | FileCheck %s 7 8; RUN: %if --need=target_ARM32 --command %p2i --filetype=obj \ 9; RUN: --target arm32 -i %s --disassemble --args -O2 \ 10; RUN: -allow-externally-defined-symbols \ 11; RUN: | %if --need=target_ARM32 --command FileCheck %s \ 12; RUN: --check-prefix=ARM32 13 14declare void @use_value(i32) 15 16; Basic cmp/branch folding. 17define internal i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { 18entry: 19 %cmp1 = icmp slt i32 %arg1, %arg2 20 br i1 %cmp1, label %branch1, label %branch2 21branch1: 22 ret i32 1 23branch2: 24 ret i32 2 25} 26 27; CHECK-LABEL: fold_cmp_br 28; CHECK: cmp 29; CHECK: jge 30; ARM32-LABEL: fold_cmp_br 31; ARM32: cmp r0, r1 32; ARM32: bge 33; ARM32: mov r0, #1 34; ARM32: bx lr 35; ARM32: mov r0, #2 36; ARM32: bx lr 37 38 39; Cmp/branch folding with intervening instructions. 40define internal i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { 41entry: 42 %cmp1 = icmp slt i32 %arg1, %arg2 43 call void @use_value(i32 %arg1) 44 br i1 %cmp1, label %branch1, label %branch2 45branch1: 46 ret i32 1 47branch2: 48 ret i32 2 49} 50 51; CHECK-LABEL: fold_cmp_br_intervening_insts 52; CHECK-NOT: cmp 53; CHECK: call 54; CHECK: cmp 55; CHECK: jge 56; ARM32-LABEL: fold_cmp_br_intervening_insts 57; ARM32: push {{[{].*[}]}} 58; ARM32: bl{{.*}}use_value 59; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}} 60; ARM32: bge 61; ARM32: mov r0, #1 62; ARM32: bx lr 63; ARM32: mov r0, #2 64; ARM32: bx lr 65 66 67; Cmp/branch non-folding because of live-out. 68define internal i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { 69entry: 70 %cmp1 = icmp slt i32 %arg1, %arg2 71 br label %next 72next: 73 br i1 %cmp1, label %branch1, label %branch2 74branch1: 75 ret i32 1 76branch2: 77 ret i32 2 78} 79 80; CHECK-LABEL: no_fold_cmp_br_liveout 81; CHECK: cmp 82; CHECK: set 83; CHECK: cmp 84; CHECK: je 85; ARM32-LABEL: no_fold_cmp_br_liveout 86; ARM32: cmp 87; ARM32: movlt [[REG:r[0-9]+]] 88; ARM32: tst [[REG]], #1 89; ARM32: beq 90 91 92; Cmp/branch non-folding because of extra non-whitelisted uses. 93define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { 94entry: 95 %cmp1 = icmp slt i32 %arg1, %arg2 96 %result = zext i1 %cmp1 to i32 97 br i1 %cmp1, label %branch1, label %branch2 98branch1: 99 ret i32 %result 100branch2: 101 ret i32 2 102} 103 104; CHECK-LABEL: no_fold_cmp_br_non_whitelist 105; CHECK: cmp 106; CHECK: set 107; CHECK: movzx 108; CHECK: cmp 109; CHECK: je 110; ARM32-LABEL: no_fold_cmp_br_non_whitelist 111; ARM32: mov [[R:r[0-9]+]], #0 112; ARM32: cmp r0, r1 113; ARM32: movlt [[R]], #1 114; ARM32: tst [[R]], #1 115; ARM32: beq 116; ARM32: bx lr 117; ARM32: mov r0, #2 118; ARM32: bx lr 119 120 121; Basic cmp/select folding. 122define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { 123entry: 124 %cmp1 = icmp slt i32 %arg1, %arg2 125 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 126 ret i32 %result 127} 128 129; CHECK-LABEL: fold_cmp_select 130; CHECK: cmp 131; CHECK: cmovl 132; ARM32-LABEL: fold_cmp_select 133; ARM32: cmp r0, r1 134; ARM32: movlt {{r[0-9]+}}, r0 135 136; 64-bit cmp/select folding. 137define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { 138entry: 139 %arg1_trunc = trunc i64 %arg1 to i32 140 %arg2_trunc = trunc i64 %arg2 to i32 141 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc 142 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 143 ret i64 %result 144} 145 146; CHECK-LABEL: fold_cmp_select_64 147; CHECK: cmp 148; CHECK: cmovl 149; CHECK: cmovl 150; ARM32-LABEL: fold_cmp_select_64 151; ARM32: cmp r0, r2 152; ARM32: movlt [[LOW:r[0-9]+]], r0 153; ARM32: movlt [[HIGH:r[0-9]+]], r1 154; ARM32: mov r0, [[LOW]] 155; ARM32: mov r1, [[HIGH]] 156; ARM32: bx lr 157 158 159define internal i64 @fold_cmp_select_64_undef(i64 %arg1) { 160entry: 161 %arg1_trunc = trunc i64 %arg1 to i32 162 %cmp1 = icmp slt i32 undef, %arg1_trunc 163 %result = select i1 %cmp1, i64 %arg1, i64 undef 164 ret i64 %result 165} 166; CHECK-LABEL: fold_cmp_select_64_undef 167; CHECK: cmp 168; CHECK: cmovl 169; CHECK: cmovl 170; ARM32-LABEL: fold_cmp_select_64_undef 171; ARM32: mov 172; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0 173; ARM32: movlt 174; ARM32: movlt 175; ARM32: bx lr 176 177 178; Cmp/select folding with intervening instructions. 179define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { 180entry: 181 %cmp1 = icmp slt i32 %arg1, %arg2 182 call void @use_value(i32 %arg1) 183 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 184 ret i32 %result 185} 186 187; CHECK-LABEL: fold_cmp_select_intervening_insts 188; CHECK-NOT: cmp 189; CHECK: call 190; CHECK: cmp 191; CHECK: cmovl 192; ARM32-LABEL: fold_cmp_select_intervening_insts 193; ARM32: bl{{.*}}use_value 194; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} 195; ARM32: movlt 196; ARM32: bx lr 197 198; Cmp/multi-select folding. 199define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { 200entry: 201 %cmp1 = icmp slt i32 %arg1, %arg2 202 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 203 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 204 %c = select i1 %cmp1, i32 123, i32 %arg1 205 %partial = add i32 %a, %b 206 %result = add i32 %partial, %c 207 ret i32 %result 208} 209 210; CHECK-LABEL: fold_cmp_select_multi 211; CHECK: cmp 212; CHECK: cmovl 213; CHECK: cmp 214; CHECK: cmovl 215; CHECK: cmp 216; CHECK: cmovge 217; CHECK: add 218; CHECK: add 219; ARM32-LABEL: fold_cmp_select_multi 220; ARM32: mov 221; ARM32: cmp 222; ARM32: movlt {{.*}}, #1 223; ARM32: mov 224; ARM32: tst {{.*}}, #1 225; ARM32: movne 226; ARM32: mov 227; ARM32: tst {{.*}}, #1 228; ARM32: movne 229; ARM32: tst {{.*}}, #1 230; ARM32: movne {{.*}}, #123 231; ARM32: bx lr 232 233 234; Cmp/multi-select non-folding because of live-out. 235define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { 236entry: 237 %cmp1 = icmp slt i32 %arg1, %arg2 238 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 239 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 240 br label %next 241next: 242 %c = select i1 %cmp1, i32 123, i32 %arg1 243 %partial = add i32 %a, %b 244 %result = add i32 %partial, %c 245 ret i32 %result 246} 247 248; CHECK-LABEL: no_fold_cmp_select_multi_liveout 249; CHECK: set 250; CHECK: cmp 251; CHECK: cmovne 252; CHECK: cmp 253; CHECK: cmovne 254; CHECK: cmp 255; CHECK: cmove 256; CHECK: add 257; CHECK: add 258; ARM32-LABEL: no_fold_cmp_select_multi_liveout 259; ARM32: mov 260; ARM32: cmp r0, r1 261; ARM32: movlt 262; ARM32: mov 263; ARM32: tst 264; ARM32: movne 265; ARM32: mov 266; ARM32: tst 267; ARM32: movne 268; ARM32: tst 269; ARM32: movne 270; ARM32: bx lr 271 272; Cmp/branch non-folding due to load folding and intervening store. 273define internal i32 @no_fold_cmp_br_store(i32 %arg2, i32 %argaddr) { 274entry: 275 %addr = inttoptr i32 %argaddr to i32* 276 %arg1 = load i32, i32* %addr, align 1 277 %cmp1 = icmp slt i32 %arg1, %arg2 278 store i32 1, i32* %addr, align 1 279 br i1 %cmp1, label %branch1, label %branch2 280branch1: 281 ret i32 1 282branch2: 283 ret i32 2 284} 285 286; CHECK-LABEL: no_fold_cmp_br_store 287; CHECK: cmp 288; CHECK: set 289; CHECK: cmp 290 291; Cmp/select non-folding due to load folding and intervening store. 292define internal i32 @no_fold_cmp_select_store(i32 %arg1, i32 %argaddr) { 293entry: 294 %addr = inttoptr i32 %argaddr to i32* 295 %arg2 = load i32, i32* %addr, align 1 296 %cmp1 = icmp slt i32 %arg1, %arg2 297 store i32 1, i32* %addr, align 1 298 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr 299 ret i32 %result 300} 301 302; CHECK-LABEL: no_fold_cmp_select_store 303; CHECK: cmp 304; CHECK: setl 305; CHECK: mov DWORD PTR 306; CHECK: cmp 307; CHECK: cmovne 308 309; Cmp/select folding due to load folding and non-intervening store. 310define internal i32 @fold_cmp_select_store(i32 %arg1, i32 %argaddr) { 311entry: 312 %addr = inttoptr i32 %argaddr to i32* 313 %arg2 = load i32, i32* %addr, align 1 314 %cmp1 = icmp slt i32 %arg1, %arg2 315 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr 316 store i32 1, i32* %addr, align 1 317 ret i32 %result 318} 319 320; CHECK-LABEL: fold_cmp_select_store 321; CHECK: cmp {{.*}},DWORD PTR 322; CHECK: cmovl 323 324; Cmp/multi-select non-folding because of extra non-whitelisted uses. 325define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, 326 i32 %arg2) { 327entry: 328 %cmp1 = icmp slt i32 %arg1, %arg2 329 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 330 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 331 %c = select i1 %cmp1, i32 123, i32 %arg1 332 %ext = zext i1 %cmp1 to i32 333 %partial1 = add i32 %a, %b 334 %partial2 = add i32 %partial1, %c 335 %result = add i32 %partial2, %ext 336 ret i32 %result 337} 338 339; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist 340; CHECK: set 341; CHECK: cmp 342; CHECK: cmovne 343; CHECK: cmp 344; CHECK: cmovne 345; CHECK: cmp 346; CHECK: cmove 347; CHECK: movzx 348; CHECK: add 349; CHECK: add 350; CHECK: add 351; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist 352; ARM32: mov 353; ARM32: cmp r0, r1 354; ARM32: movlt 355; ARM32: mov 356; ARM32: tst 357; ARM32: movne 358; ARM32: mov 359; ARM32: tst 360; ARM32: movne 361; ARM32: tst 362; ARM32: movne 363; ARM32: bx lr 364 365define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) { 366 %t0 = trunc i32 %arg1 to i1 367 %t1 = trunc i32 %arg2 to i1 368 369 %t2 = and i1 %t0, %t1 370 br i1 %t2, label %target_true, label %target_false 371 372target_true: 373 ret i32 1 374 375target_false: 376 ret i32 0 377} 378; ARM32-LABEL: br_i1_folding2_and 379; ARM32: tst r0, #1 380; ARM32: beq 381; ARM32: tst r1, #1 382; ARM32: beq 383 384define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) { 385 %t0 = trunc i32 %arg1 to i1 386 %t1 = trunc i32 %arg2 to i1 387 388 %t2 = or i1 %t0, %t1 389 br i1 %t2, label %target_true, label %target_false 390 391target_true: 392 ret i32 1 393 394target_false: 395 ret i32 0 396} 397; ARM32-LABEL: br_i1_folding2_or 398; ARM32: tst r0, #1 399; ARM32: bne 400; ARM32: tst r1, #1 401; ARM32: beq 402 403define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) { 404 %t0 = trunc i32 %arg1 to i1 405 %t1 = trunc i32 %arg2 to i1 406 %t2 = trunc i32 %arg3 to i1 407 408 %t3 = and i1 %t0, %t1 409 %t4 = or i1 %t3, %t2 410 411 br i1 %t4, label %target_true, label %target_false 412 413target_true: 414 ret i32 1 415 416target_false: 417 ret i32 0 418} 419; ARM32-LABEL: br_i1_folding3_and_or 420; ARM32: tst r0, #1 421; ARM32: beq 422; ARM32: tst r1, #1 423; ARM32: bne 424; ARM32: tst r2, #1 425; ARM32: beq 426 427define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) { 428 %t0 = trunc i32 %arg1 to i1 429 %t1 = trunc i32 %arg2 to i1 430 %t2 = trunc i32 %arg3 to i1 431 432 %t3 = or i1 %t0, %t1 433 %t4 = and i1 %t3, %t2 434 435 br i1 %t4, label %target_true, label %target_false 436 437target_true: 438 ret i32 1 439 440target_false: 441 ret i32 0 442} 443; ARM32-LABEL: br_i1_folding3_or_and 444; ARM32: tst r0, #1 445; ARM32: bne 446; ARM32: tst r1, #1 447; ARM32: beq 448; ARM32: tst r2, #1 449; ARM32: beq 450 451define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, 452 i32 %arg5) { 453 %t0 = trunc i32 %arg1 to i1 454 %t1 = trunc i32 %arg2 to i1 455 %t2 = trunc i32 %arg3 to i1 456 %t3 = trunc i32 %arg4 to i1 457 %t4 = trunc i32 %arg5 to i1 458 459 %t5 = or i1 %t0, %t1 460 %t6 = and i1 %t5, %t2 461 %t7 = and i1 %t3, %t4 462 %t8 = or i1 %t6, %t7 463 br i1 %t8, label %target_true, label %target_false 464 465target_true: 466 ret i32 1 467 468target_false: 469 ret i32 0 470} 471; ARM32-LABEL: br_i1_folding4 472; ARM32: tst r0, #1 473; ARM32: bne 474; ARM32: tst r1, #1 475; ARM32: beq 476; ARM32: tst r2, #1 477; ARM32: bne 478; ARM32: tst r3, #1 479; ARM32: beq [[TARGET:.*]] 480; ARM32: tst r4, #1 481; ARM32: beq [[TARGET]] 482