1; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s 2; RUN: llc < %s -march=arm64 | FileCheck %s 3%struct.X = type { i8, i8, [2 x i8] } 4%struct.Y = type { i32, i8 } 5%struct.Z = type { i8, i8, [2 x i8], i16 } 6%struct.A = type { i64, i8 } 7 8define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp { 9; CHECK-LABEL: foo: 10; CHECK: ubfx 11; CHECK-NOT: and 12; CHECK: ret 13 14 %tmp = bitcast %struct.X* %x to i32* 15 %tmp1 = load i32, i32* %tmp, align 4 16 %b = getelementptr inbounds %struct.Y, %struct.Y* %y, i64 0, i32 1 17 %bf.clear = lshr i32 %tmp1, 3 18 %bf.clear.lobit = and i32 %bf.clear, 1 19 %frombool = trunc i32 %bf.clear.lobit to i8 20 store i8 %frombool, i8* %b, align 1 21 ret void 22} 23 24define i32 @baz(i64 %cav1.coerce) nounwind { 25; CHECK-LABEL: baz: 26; CHECK: sbfx w0, w0, #0, #4 27 %tmp = trunc i64 %cav1.coerce to i32 28 %tmp1 = shl i32 %tmp, 28 29 %bf.val.sext = ashr exact i32 %tmp1, 28 30 ret i32 %bf.val.sext 31} 32 33define i32 @bar(i64 %cav1.coerce) nounwind { 34; CHECK-LABEL: bar: 35; CHECK: sbfx w0, w0, #4, #6 36 %tmp = trunc i64 %cav1.coerce to i32 37 %cav1.sroa.0.1.insert = shl i32 %tmp, 22 38 %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26 39 ret i32 %tmp1 40} 41 42define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp { 43; CHECK-LABEL: fct1: 44; CHECK: ubfx x{{[0-9]+}}, x{{[0-9]+}} 45; CHECK-NOT: and 46; CHECK: ret 47 48 %tmp = bitcast %struct.Z* %x to i64* 49 %tmp1 = load i64, i64* %tmp, align 4 50 %b = getelementptr inbounds %struct.A, %struct.A* %y, i64 0, i32 0 51 %bf.clear = lshr i64 %tmp1, 3 52 %bf.clear.lobit = and i64 %bf.clear, 1 53 store i64 %bf.clear.lobit, i64* %b, align 8 54 ret void 55} 56 57define i64 @fct2(i64 %cav1.coerce) nounwind { 58; CHECK-LABEL: fct2: 59; CHECK: sbfx x0, x0, #0, #36 60 %tmp = shl i64 %cav1.coerce, 28 61 %bf.val.sext = ashr exact i64 %tmp, 28 62 ret i64 %bf.val.sext 63} 64 65define i64 @fct3(i64 %cav1.coerce) nounwind { 66; CHECK-LABEL: fct3: 67; CHECK: sbfx x0, x0, #4, #38 68 %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22 69 %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26 70 ret i64 %tmp1 71} 72 73define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 74entry: 75; CHECK-LABEL: fct4: 76; CHECK: ldr [[REG1:x[0-9]+]], 77; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24 78; CHECK-NEXT: str [[REG1]], 79; CHECK-NEXT: ret 80 %0 = load i64, i64* %y, align 8 81 %and = and i64 %0, -16777216 82 %shr = lshr i64 %x, 16 83 %and1 = and i64 %shr, 16777215 84 %or = or i64 %and, %and1 85 store i64 %or, i64* %y, align 8 86 ret void 87} 88 89define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 90entry: 91; CHECK-LABEL: fct5: 92; CHECK: ldr [[REG1:w[0-9]+]], 93; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3 94; CHECK-NEXT: str [[REG1]], 95; CHECK-NEXT: ret 96 %0 = load i32, i32* %y, align 8 97 %and = and i32 %0, -8 98 %shr = lshr i32 %x, 16 99 %and1 = and i32 %shr, 7 100 %or = or i32 %and, %and1 101 store i32 %or, i32* %y, align 8 102 ret void 103} 104 105; Check if we can still catch bfm instruction when we drop some low bits 106define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 107entry: 108; CHECK-LABEL: fct6: 109; CHECK: ldr [[REG1:w[0-9]+]], 110; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3 111; lsr is an alias of ubfm 112; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2 113; CHECK-NEXT: str [[REG2]], 114; CHECK-NEXT: ret 115 %0 = load i32, i32* %y, align 8 116 %and = and i32 %0, -8 117 %shr = lshr i32 %x, 16 118 %and1 = and i32 %shr, 7 119 %or = or i32 %and, %and1 120 %shr1 = lshr i32 %or, 2 121 store i32 %shr1, i32* %y, align 8 122 ret void 123} 124 125 126; Check if we can still catch bfm instruction when we drop some high bits 127define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 128entry: 129; CHECK-LABEL: fct7: 130; CHECK: ldr [[REG1:w[0-9]+]], 131; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3 132; lsl is an alias of ubfm 133; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 134; CHECK-NEXT: str [[REG2]], 135; CHECK-NEXT: ret 136 %0 = load i32, i32* %y, align 8 137 %and = and i32 %0, -8 138 %shr = lshr i32 %x, 16 139 %and1 = and i32 %shr, 7 140 %or = or i32 %and, %and1 141 %shl = shl i32 %or, 2 142 store i32 %shl, i32* %y, align 8 143 ret void 144} 145 146 147; Check if we can still catch bfm instruction when we drop some low bits 148; (i64 version) 149define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 150entry: 151; CHECK-LABEL: fct8: 152; CHECK: ldr [[REG1:x[0-9]+]], 153; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3 154; lsr is an alias of ubfm 155; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2 156; CHECK-NEXT: str [[REG2]], 157; CHECK-NEXT: ret 158 %0 = load i64, i64* %y, align 8 159 %and = and i64 %0, -8 160 %shr = lshr i64 %x, 16 161 %and1 = and i64 %shr, 7 162 %or = or i64 %and, %and1 163 %shr1 = lshr i64 %or, 2 164 store i64 %shr1, i64* %y, align 8 165 ret void 166} 167 168 169; Check if we can still catch bfm instruction when we drop some high bits 170; (i64 version) 171define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 172entry: 173; CHECK-LABEL: fct9: 174; CHECK: ldr [[REG1:x[0-9]+]], 175; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3 176; lsr is an alias of ubfm 177; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 178; CHECK-NEXT: str [[REG2]], 179; CHECK-NEXT: ret 180 %0 = load i64, i64* %y, align 8 181 %and = and i64 %0, -8 182 %shr = lshr i64 %x, 16 183 %and1 = and i64 %shr, 7 184 %or = or i64 %and, %and1 185 %shl = shl i64 %or, 2 186 store i64 %shl, i64* %y, align 8 187 ret void 188} 189 190; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) 191; (i32 version) 192define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 193entry: 194; CHECK-LABEL: fct10: 195; CHECK: ldr [[REG1:w[0-9]+]], 196; CHECK-NEXT: bfxil [[REG1]], w1, #0, #3 197; lsl is an alias of ubfm 198; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 199; CHECK-NEXT: str [[REG2]], 200; CHECK-NEXT: ret 201 %0 = load i32, i32* %y, align 8 202 %and = and i32 %0, -8 203 %and1 = and i32 %x, 7 204 %or = or i32 %and, %and1 205 %shl = shl i32 %or, 2 206 store i32 %shl, i32* %y, align 8 207 ret void 208} 209 210; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) 211; (i64 version) 212define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 213entry: 214; CHECK-LABEL: fct11: 215; CHECK: ldr [[REG1:x[0-9]+]], 216; CHECK-NEXT: bfxil [[REG1]], x1, #0, #3 217; lsl is an alias of ubfm 218; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 219; CHECK-NEXT: str [[REG2]], 220; CHECK-NEXT: ret 221 %0 = load i64, i64* %y, align 8 222 %and = and i64 %0, -8 223 %and1 = and i64 %x, 7 224 %or = or i64 %and, %and1 225 %shl = shl i64 %or, 2 226 store i64 %shl, i64* %y, align 8 227 ret void 228} 229 230define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 { 231; CHECK-LABEL: fct12bis: 232; CHECK-NOT: and 233; CHECK: ubfx w0, w0, #11, #1 234 %and.i.i = and i32 %tmp2, 2048 235 %tobool.i.i = icmp ne i32 %and.i.i, 0 236 ret i1 %tobool.i.i 237} 238 239; Check if we can still catch bfm instruction when we drop some high bits 240; and some low bits 241define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 242entry: 243; CHECK-LABEL: fct12: 244; CHECK: ldr [[REG1:w[0-9]+]], 245; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3 246; lsr is an alias of ubfm 247; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28 248; CHECK-NEXT: str [[REG2]], 249; CHECK-NEXT: ret 250 %0 = load i32, i32* %y, align 8 251 %and = and i32 %0, -8 252 %shr = lshr i32 %x, 16 253 %and1 = and i32 %shr, 7 254 %or = or i32 %and, %and1 255 %shl = shl i32 %or, 2 256 %shr2 = lshr i32 %shl, 4 257 store i32 %shr2, i32* %y, align 8 258 ret void 259} 260 261; Check if we can still catch bfm instruction when we drop some high bits 262; and some low bits 263; (i64 version) 264define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 265entry: 266; CHECK-LABEL: fct13: 267; CHECK: ldr [[REG1:x[0-9]+]], 268; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3 269; lsr is an alias of ubfm 270; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60 271; CHECK-NEXT: str [[REG2]], 272; CHECK-NEXT: ret 273 %0 = load i64, i64* %y, align 8 274 %and = and i64 %0, -8 275 %shr = lshr i64 %x, 16 276 %and1 = and i64 %shr, 7 277 %or = or i64 %and, %and1 278 %shl = shl i64 %or, 2 279 %shr2 = lshr i64 %shl, 4 280 store i64 %shr2, i64* %y, align 8 281 ret void 282} 283 284 285; Check if we can still catch bfm instruction when we drop some high bits 286; and some low bits 287define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp { 288entry: 289; CHECK-LABEL: fct14: 290; CHECK: ldr [[REG1:w[0-9]+]], 291; CHECK-NEXT: bfxil [[REG1]], w1, #16, #8 292; lsr is an alias of ubfm 293; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4 294; CHECK-NEXT: bfxil [[REG2]], w2, #5, #3 295; lsl is an alias of ubfm 296; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2 297; CHECK-NEXT: str [[REG3]], 298; CHECK-NEXT: ret 299 %0 = load i32, i32* %y, align 8 300 %and = and i32 %0, -256 301 %shr = lshr i32 %x, 16 302 %and1 = and i32 %shr, 255 303 %or = or i32 %and, %and1 304 %shl = lshr i32 %or, 4 305 %and2 = and i32 %shl, -8 306 %shr1 = lshr i32 %x1, 5 307 %and3 = and i32 %shr1, 7 308 %or1 = or i32 %and2, %and3 309 %shl1 = shl i32 %or1, 2 310 store i32 %shl1, i32* %y, align 8 311 ret void 312} 313 314; Check if we can still catch bfm instruction when we drop some high bits 315; and some low bits 316; (i64 version) 317define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp { 318entry: 319; CHECK-LABEL: fct15: 320; CHECK: ldr [[REG1:x[0-9]+]], 321; CHECK-NEXT: bfxil [[REG1]], x1, #16, #8 322; lsr is an alias of ubfm 323; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4 324; CHECK-NEXT: bfxil [[REG2]], x2, #5, #3 325; lsl is an alias of ubfm 326; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2 327; CHECK-NEXT: str [[REG3]], 328; CHECK-NEXT: ret 329 %0 = load i64, i64* %y, align 8 330 %and = and i64 %0, -256 331 %shr = lshr i64 %x, 16 332 %and1 = and i64 %shr, 255 333 %or = or i64 %and, %and1 334 %shl = lshr i64 %or, 4 335 %and2 = and i64 %shl, -8 336 %shr1 = lshr i64 %x1, 5 337 %and3 = and i64 %shr1, 7 338 %or1 = or i64 %and2, %and3 339 %shl1 = shl i64 %or1, 2 340 store i64 %shl1, i64* %y, align 8 341 ret void 342} 343 344; Check if we can still catch bfm instruction when we drop some high bits 345; and some low bits and a masking operation has to be kept 346define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 347entry: 348; CHECK-LABEL: fct16: 349; CHECK: ldr [[REG1:w[0-9]+]], 350; Create the constant 351; CHECK: mov [[REGCST:w[0-9]+]], #1703936 352; CHECK: movk [[REGCST]], #33120 353; Do the masking 354; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]] 355; CHECK-NEXT: bfxil [[REG2]], w1, #16, #3 356; lsr is an alias of ubfm 357; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28 358; CHECK-NEXT: str [[REG3]], 359; CHECK-NEXT: ret 360 %0 = load i32, i32* %y, align 8 361 %and = and i32 %0, 1737056 362 %shr = lshr i32 %x, 16 363 %and1 = and i32 %shr, 7 364 %or = or i32 %and, %and1 365 %shl = shl i32 %or, 2 366 %shr2 = lshr i32 %shl, 4 367 store i32 %shr2, i32* %y, align 8 368 ret void 369} 370 371 372; Check if we can still catch bfm instruction when we drop some high bits 373; and some low bits and a masking operation has to be kept 374; (i64 version) 375define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 376entry: 377; CHECK-LABEL: fct17: 378; CHECK: ldr [[REG1:x[0-9]+]], 379; Create the constant 380; CHECK: mov w[[REGCST:[0-9]+]], #1703936 381; CHECK: movk w[[REGCST]], #33120 382; Do the masking 383; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]] 384; CHECK-NEXT: bfxil [[REG2]], x1, #16, #3 385; lsr is an alias of ubfm 386; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60 387; CHECK-NEXT: str [[REG3]], 388; CHECK-NEXT: ret 389 %0 = load i64, i64* %y, align 8 390 %and = and i64 %0, 1737056 391 %shr = lshr i64 %x, 16 392 %and1 = and i64 %shr, 7 393 %or = or i64 %and, %and1 394 %shl = shl i64 %or, 2 395 %shr2 = lshr i64 %shl, 4 396 store i64 %shr2, i64* %y, align 8 397 ret void 398} 399 400define i64 @fct18(i32 %xor72) nounwind ssp { 401; CHECK-LABEL: fct18: 402; CHECK: ubfx x0, x0, #9, #8 403 %shr81 = lshr i32 %xor72, 9 404 %conv82 = zext i32 %shr81 to i64 405 %result = and i64 %conv82, 255 406 ret i64 %result 407} 408 409; Using the access to the global array to keep the instruction and control flow. 410@first_ones = external global [65536 x i8] 411 412; Function Attrs: nounwind readonly ssp 413define i32 @fct19(i64 %arg1) nounwind readonly ssp { 414; CHECK-LABEL: fct19: 415entry: 416 %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16 417 %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16 418 %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32 419 %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48 420 %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0 421 br i1 %tobool, label %if.end, label %if.then 422 423if.then: ; preds = %entry 424 %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift 425 %0 = load i8, i8* %arrayidx3, align 1 426 %conv = zext i8 %0 to i32 427 br label %return 428 429; OPT-LABEL: if.end 430if.end: ; preds = %entry 431; OPT: lshr 432; CHECK: ubfx [[REG1:x[0-9]+]], [[REG2:x[0-9]+]], #32, #16 433 %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16 434 %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0 435; CHECK: cbz 436 br i1 %tobool6, label %if.end13, label %if.then7 437 438; OPT-LABEL: if.then7 439if.then7: ; preds = %if.end 440; OPT: lshr 441; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 442; So neither of them should be in the assemble code. 443; CHECK-NOT: and 444; CHECK-NOT: ubfm 445 %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535 446 %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10 447 %1 = load i8, i8* %arrayidx11, align 1 448 %conv12 = zext i8 %1 to i32 449 %add = add nsw i32 %conv12, 16 450 br label %return 451 452; OPT-LABEL: if.end13 453if.end13: ; preds = %if.end 454; OPT: lshr 455; OPT: trunc 456; CHECK: ubfx [[REG3:x[0-9]+]], [[REG4:x[0-9]+]], #16, #16 457 %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0 458; CHECK: cbz 459 br i1 %tobool16, label %return, label %if.then17 460 461; OPT-LABEL: if.then17 462if.then17: ; preds = %if.end13 463; OPT: lshr 464; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 465; So neither of them should be in the assemble code. 466; CHECK-NOT: and 467; CHECK-NOT: ubfm 468 %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535 469 %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20 470 %2 = load i8, i8* %arrayidx21, align 1 471 %conv22 = zext i8 %2 to i32 472 %add23 = add nsw i32 %conv22, 32 473 br label %return 474 475return: ; preds = %if.end13, %if.then17, %if.then7, %if.then 476; CHECK: ret 477 %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ] 478 ret i32 %retval.0 479} 480 481; Make sure we do not assert if the immediate in and is bigger than i64. 482; PR19503. 483; OPT-LABEL: @fct20 484; OPT: lshr 485; OPT-NOT: lshr 486; OPT: ret 487; CHECK-LABEL: fct20: 488; CHECK: ret 489define i80 @fct20(i128 %a, i128 %b) { 490entry: 491 %shr = lshr i128 %a, 18 492 %conv = trunc i128 %shr to i80 493 %tobool = icmp eq i128 %b, 0 494 br i1 %tobool, label %then, label %end 495then: 496 %and = and i128 %shr, 483673642326615442599424 497 %conv2 = trunc i128 %and to i80 498 br label %end 499end: 500 %conv3 = phi i80 [%conv, %entry], [%conv2, %then] 501 ret i80 %conv3 502} 503 504; Check if we can still catch UBFX when "AND" is used by SHL. 505; CHECK-LABEL: fct21: 506; CHECK: ubfx 507@arr = external global [8 x [64 x i64]] 508define i64 @fct21(i64 %x) { 509entry: 510 %shr = lshr i64 %x, 4 511 %and = and i64 %shr, 15 512 %arrayidx = getelementptr inbounds [8 x [64 x i64]], [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and 513 %0 = load i64, i64* %arrayidx, align 8 514 ret i64 %0 515} 516 517define i16 @test_ignored_rightbits(i32 %dst, i32 %in) { 518; CHECK-LABEL: test_ignored_rightbits: 519 520 %positioned_field = shl i32 %in, 3 521 %positioned_masked_field = and i32 %positioned_field, 120 522 %masked_dst = and i32 %dst, 7 523 %insertion = or i32 %masked_dst, %positioned_masked_field 524; CHECK: {{bfm|bfi|bfxil}} 525 526 %shl16 = shl i32 %insertion, 8 527 %or18 = or i32 %shl16, %insertion 528 %conv19 = trunc i32 %or18 to i16 529; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #7 530 531 ret i16 %conv19 532} 533