1; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=non-leaf | \ 2; RUN: llvm-objdump --private-headers - | \ 3; RUN: FileCheck %s --check-prefix=CHECK-MACHO 4; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT 5; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -fast-isel -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST 6 7; CHECK-MACHO: Mach header 8; CHECK-MACHO: MH_MAGIC ARM64_32 V8 9 10@var64 = global i64 zeroinitializer, align 8 11@var32 = global i32 zeroinitializer, align 4 12 13@var_got = external global i8 14 15define i32* @test_global_addr() { 16; CHECK-LABEL: test_global_addr: 17; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE 18; CHECK-OPT: add x0, [[PAGE]], _var32@PAGEOFF 19; CHECK-FAST: add [[TMP:x[0-9]+]], [[PAGE]], _var32@PAGEOFF 20; CHECK-FAST: and x0, [[TMP]], #0xffffffff 21 ret i32* @var32 22} 23 24; ADRP is necessarily 64-bit. The important point to check is that, however that 25; gets truncated to 32-bits, it's free. No need to zero out higher bits of that 26; register. 27define i64 @test_global_addr_extension() { 28; CHECK-LABEL: test_global_addr_extension: 29; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE 30; CHECK: add x0, [[PAGE]], _var32@PAGEOFF 31; CHECK-NOT: and 32; CHECK: ret 33 34 ret i64 ptrtoint(i32* @var32 to i64) 35} 36 37define i32 @test_global_value() { 38; CHECK-LABEL: test_global_value: 39; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE 40; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF] 41 %val = load i32, i32* @var32, align 4 42 ret i32 %val 43} 44 45; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. 46define i32 @test_unsafe_indexed_add() { 47; CHECK-LABEL: test_unsafe_indexed_add: 48; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 49; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 50; CHECK: ldr w0, [x[[ADDR]]] 51 %addr_int = ptrtoint i32* @var32 to i32 52 %addr_plus_32 = add i32 %addr_int, 32 53 %addr = inttoptr i32 %addr_plus_32 to i32* 54 %val = load i32, i32* %addr, align 4 55 ret i32 %val 56} 57 58; Since we've promised there is no unsigned overflow, @var32 must be at least 59; 32-bytes below 2^32, and we can use the load this time. 60define i32 @test_safe_indexed_add() { 61; CHECK-LABEL: test_safe_indexed_add: 62; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 63; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 64; CHECK: ldr w0, [x[[ADDR]]] 65 %addr_int = ptrtoint i32* @var32 to i64 66 %addr_plus_32 = add nuw i64 %addr_int, 32 67 %addr = inttoptr i64 %addr_plus_32 to i32* 68 %val = load i32, i32* %addr, align 4 69 ret i32 %val 70} 71 72define i32 @test_safe_indexed_or(i32 %in) { 73; CHECK-LABEL: test_safe_indexed_or: 74; CHECK: and [[TMP:w[0-9]+]], {{w[0-9]+}}, #0xfffffff0 75; CHECK: orr w[[ADDR:[0-9]+]], [[TMP]], #0x4 76; CHECK: ldr w0, [x[[ADDR]]] 77 %addr_int = and i32 %in, -16 78 %addr_plus_4 = or i32 %addr_int, 4 79 %addr = inttoptr i32 %addr_plus_4 to i32* 80 %val = load i32, i32* %addr, align 4 81 ret i32 %val 82} 83 84 85; Promising nsw is not sufficient because the addressing mode basically 86; calculates "zext(base) + zext(offset)" and nsw only guarantees 87; "sext(base) + sext(offset) == base + offset". 88define i32 @test_unsafe_nsw_indexed_add() { 89; CHECK-LABEL: test_unsafe_nsw_indexed_add: 90; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 91; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 92; CHECK-NOT: ubfx 93; CHECK: ldr w0, [x[[ADDR]]] 94 %addr_int = ptrtoint i32* @var32 to i32 95 %addr_plus_32 = add nsw i32 %addr_int, 32 96 %addr = inttoptr i32 %addr_plus_32 to i32* 97 %val = load i32, i32* %addr, align 4 98 ret i32 %val 99} 100 101; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. 102define i32 @test_unsafe_unscaled_add() { 103; CHECK-LABEL: test_unsafe_unscaled_add: 104; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 105; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 106; CHECK: ldr w0, [x[[ADDR]]] 107 %addr_int = ptrtoint i32* @var32 to i32 108 %addr_plus_3 = add i32 %addr_int, 3 109 %addr = inttoptr i32 %addr_plus_3 to i32* 110 %val = load i32, i32* %addr, align 1 111 ret i32 %val 112} 113 114; Since we've promised there is no unsigned overflow, @var32 must be at least 115; 32-bytes below 2^32, and we can use the load this time. 116define i32 @test_safe_unscaled_add() { 117; CHECK-LABEL: test_safe_unscaled_add: 118; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 119; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 120; CHECK: ldr w0, [x[[ADDR]]] 121 %addr_int = ptrtoint i32* @var32 to i32 122 %addr_plus_3 = add nuw i32 %addr_int, 3 123 %addr = inttoptr i32 %addr_plus_3 to i32* 124 %val = load i32, i32* %addr, align 1 125 ret i32 %val 126} 127 128; Promising nsw is not sufficient because the addressing mode basically 129; calculates "zext(base) + zext(offset)" and nsw only guarantees 130; "sext(base) + sext(offset) == base + offset". 131define i32 @test_unsafe_nsw_unscaled_add() { 132; CHECK-LABEL: test_unsafe_nsw_unscaled_add: 133; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 134; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 135; CHECK-NOT: ubfx 136; CHECK: ldr w0, [x[[ADDR]]] 137 %addr_int = ptrtoint i32* @var32 to i32 138 %addr_plus_3 = add nsw i32 %addr_int, 3 139 %addr = inttoptr i32 %addr_plus_3 to i32* 140 %val = load i32, i32* %addr, align 1 141 ret i32 %val 142} 143 144; Because the addition may wrap, it is not safe to use "ldur w0, [xN, #-3]" 145; here. 146define i32 @test_unsafe_negative_unscaled_add() { 147; CHECK-LABEL: test_unsafe_negative_unscaled_add: 148; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF 149; CHECK: sub w[[ADDR:[0-9]+]], w[[VAR32]], #3 150; CHECK: ldr w0, [x[[ADDR]]] 151 %addr_int = ptrtoint i32* @var32 to i32 152 %addr_minus_3 = add i32 %addr_int, -3 153 %addr = inttoptr i32 %addr_minus_3 to i32* 154 %val = load i32, i32* %addr, align 1 155 ret i32 %val 156} 157 158define i8* @test_got_addr() { 159; CHECK-LABEL: test_got_addr: 160; CHECK: adrp x[[PAGE:[0-9]+]], _var_got@GOTPAGE 161; CHECK-OPT: ldr w0, [x[[PAGE]], _var_got@GOTPAGEOFF] 162; CHECK-FAST: ldr w[[TMP:[0-9]+]], [x[[PAGE]], _var_got@GOTPAGEOFF] 163; CHECK-FAST: and x0, x[[TMP]], #0xffffffff 164 ret i8* @var_got 165} 166 167define float @test_va_arg_f32(i8** %list) { 168; CHECK-LABEL: test_va_arg_f32: 169 170; CHECK: ldr w[[START:[0-9]+]], [x0] 171; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #8 172; CHECK: str [[AFTER]], [x0] 173 174 ; Floating point arguments get promoted to double as per C99. 175; CHECK: ldr [[DBL:d[0-9]+]], [x[[START]]] 176; CHECK: fcvt s0, [[DBL]] 177 %res = va_arg i8** %list, float 178 ret float %res 179} 180 181; Interesting point is that the slot is 4 bytes. 182define i8 @test_va_arg_i8(i8** %list) { 183; CHECK-LABEL: test_va_arg_i8: 184 185; CHECK: ldr w[[START:[0-9]+]], [x0] 186; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #4 187; CHECK: str [[AFTER]], [x0] 188 189 ; i8 gets promoted to int (again, as per C99). 190; CHECK: ldr w0, [x[[START]]] 191 192 %res = va_arg i8** %list, i8 193 ret i8 %res 194} 195 196; Interesting point is that the slot needs aligning (again, min size is 4 197; bytes). 198define i64 @test_va_arg_i64(i64** %list) { 199; CHECK-LABEL: test_va_arg_i64: 200 201 ; Update the list for the next user (minimum slot size is 4, but the actual 202 ; argument is 8 which had better be reflected!) 203; CHECK: ldr w[[UNALIGNED_START:[0-9]+]], [x0] 204; CHECK: add [[ALIGN_TMP:x[0-9]+]], x[[UNALIGNED_START]], #7 205; CHECK: and x[[START:[0-9]+]], [[ALIGN_TMP]], #0x1fffffff8 206; CHECK: add w[[AFTER:[0-9]+]], w[[START]], #8 207; CHECK: str w[[AFTER]], [x0] 208 209; CHECK: ldr x0, [x[[START]]] 210 211 %res = va_arg i64** %list, i64 212 ret i64 %res 213} 214 215declare void @bar(...) 216define void @test_va_call(i8 %l, i8 %r, float %in, i8* %ptr) { 217; CHECK-LABEL: test_va_call: 218; CHECK: add [[SUM:w[0-9]+]], {{w[0-9]+}}, w1 219 220; CHECK-DAG: str w2, [sp, #32] 221; CHECK-DAG: str xzr, [sp, #24] 222; CHECK-DAG: str s0, [sp, #16] 223; CHECK-DAG: str xzr, [sp, #8] 224; CHECK-DAG: str [[SUM]], [sp] 225 226 ; Add them to ensure real promotion occurs. 227 %sum = add i8 %l, %r 228 call void(...) @bar(i8 %sum, i64 0, float %in, double 0.0, i8* %ptr) 229 ret void 230} 231 232declare i8* @llvm.frameaddress(i32) 233 234define i8* @test_frameaddr() { 235; CHECK-LABEL: test_frameaddr: 236; CHECK-OPT: ldr x0, [x29] 237; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x29] 238; CHECK-FAST: and x0, [[TMP]], #0xffffffff 239 %val = call i8* @llvm.frameaddress(i32 1) 240 ret i8* %val 241} 242 243declare i8* @llvm.returnaddress(i32) 244 245define i8* @test_toplevel_returnaddr() { 246; CHECK-LABEL: test_toplevel_returnaddr: 247; CHECK-OPT: mov x0, x30 248; CHECK-FAST: and x0, x30, #0xffffffff 249 %val = call i8* @llvm.returnaddress(i32 0) 250 ret i8* %val 251} 252 253define i8* @test_deep_returnaddr() { 254; CHECK-LABEL: test_deep_returnaddr: 255; CHECK: ldr x[[FRAME_REC:[0-9]+]], [x29] 256; CHECK-OPT: ldr x30, [x[[FRAME_REC]], #8] 257; CHECK-OPT: hint #7 258; CHECK-OPT: mov x0, x30 259; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x[[FRAME_REC]], #8] 260; CHECK-FAST: and x0, [[TMP]], #0xffffffff 261 %val = call i8* @llvm.returnaddress(i32 1) 262 ret i8* %val 263} 264 265define void @test_indirect_call(void()* %func) { 266; CHECK-LABEL: test_indirect_call: 267; CHECK: blr x0 268 call void() %func() 269 ret void 270} 271 272; Safe to use the unextended address here 273define void @test_indirect_safe_call(i32* %weird_funcs) { 274; CHECK-LABEL: test_indirect_safe_call: 275; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 276; CHECK-OPT-NOT: ubfx 277; CHECK: blr x[[ADDR32]] 278 %addr = getelementptr i32, i32* %weird_funcs, i32 1 279 %func = bitcast i32* %addr to void()* 280 call void() %func() 281 ret void 282} 283 284declare void @simple() 285define void @test_simple_tail_call() { 286; CHECK-LABEL: test_simple_tail_call: 287; CHECK: b _simple 288 tail call void @simple() 289 ret void 290} 291 292define void @test_indirect_tail_call(void()* %func) { 293; CHECK-LABEL: test_indirect_tail_call: 294; CHECK: br x0 295 tail call void() %func() 296 ret void 297} 298 299; Safe to use the unextended address here 300define void @test_indirect_safe_tail_call(i32* %weird_funcs) { 301; CHECK-LABEL: test_indirect_safe_tail_call: 302; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 303; CHECK-OPT-NOT: ubfx 304; CHECK-OPT: br x[[ADDR32]] 305 %addr = getelementptr i32, i32* %weird_funcs, i32 1 306 %func = bitcast i32* %addr to void()* 307 tail call void() %func() 308 ret void 309} 310 311; For the "armv7k" slice, Clang will be emitting some small structs as [N x 312; i32]. For ABI compatibility with arm64_32 these need to be passed in *X* 313; registers (e.g. [2 x i32] would be packed into a single register). 314 315define i32 @test_in_smallstruct_low([3 x i32] %in) { 316; CHECK-LABEL: test_in_smallstruct_low: 317; CHECK: mov x0, x1 318 %val = extractvalue [3 x i32] %in, 2 319 ret i32 %val 320} 321 322define i32 @test_in_smallstruct_high([3 x i32] %in) { 323; CHECK-LABEL: test_in_smallstruct_high: 324; CHECK: lsr x0, x0, #32 325 %val = extractvalue [3 x i32] %in, 1 326 ret i32 %val 327} 328 329; The 64-bit DarwinPCS ABI has the quirk that structs on the stack are always 330; 64-bit aligned. This must not happen for arm64_32 since othwerwise va_arg will 331; be incompatible with the armv7k ABI. 332define i32 @test_in_smallstruct_stack([8 x i64], i32, [3 x i32] %in) { 333; CHECK-LABEL: test_in_smallstruct_stack: 334; CHECK: ldr w0, [sp, #4] 335 %val = extractvalue [3 x i32] %in, 0 336 ret i32 %val 337} 338 339define [2 x i32] @test_ret_smallstruct([3 x i32] %in) { 340; CHECK-LABEL: test_ret_smallstruct: 341; CHECK: mov x0, #1 342; CHECK: movk x0, #2, lsl #32 343 344 ret [2 x i32] [i32 1, i32 2] 345} 346 347declare void @smallstruct_callee([4 x i32]) 348define void @test_call_smallstruct() { 349; CHECK-LABEL: test_call_smallstruct: 350; CHECK: mov x0, #1 351; CHECK: movk x0, #2, lsl #32 352; CHECK: mov x1, #3 353; CHECK: movk x1, #4, lsl #32 354; CHECK: bl _smallstruct_callee 355 356 call void @smallstruct_callee([4 x i32] [i32 1, i32 2, i32 3, i32 4]) 357 ret void 358} 359 360declare void @smallstruct_callee_stack([8 x i64], i32, [2 x i32]) 361define void @test_call_smallstruct_stack() { 362; CHECK-LABEL: test_call_smallstruct_stack: 363; CHECK: mov [[VAL:x[0-9]+]], #1 364; CHECK: movk [[VAL]], #2, lsl #32 365; CHECK: stur [[VAL]], [sp, #4] 366 367 call void @smallstruct_callee_stack([8 x i64] undef, i32 undef, [2 x i32] [i32 1, i32 2]) 368 ret void 369} 370 371declare [3 x i32] @returns_smallstruct() 372define i32 @test_use_smallstruct_low() { 373; CHECK-LABEL: test_use_smallstruct_low: 374; CHECK: bl _returns_smallstruct 375; CHECK: mov x0, x1 376 377 %struct = call [3 x i32] @returns_smallstruct() 378 %val = extractvalue [3 x i32] %struct, 2 379 ret i32 %val 380} 381 382define i32 @test_use_smallstruct_high() { 383; CHECK-LABEL: test_use_smallstruct_high: 384; CHECK: bl _returns_smallstruct 385; CHECK: lsr x0, x0, #32 386 387 %struct = call [3 x i32] @returns_smallstruct() 388 %val = extractvalue [3 x i32] %struct, 1 389 ret i32 %val 390} 391 392; If a small struct can't be allocated to x0-x7, the remaining registers should 393; be marked as unavailable and subsequent GPR arguments should also be on the 394; stack. Obviously the struct itself should be passed entirely on the stack. 395define i32 @test_smallstruct_padding([7 x i64], [4 x i32] %struct, i32 %in) { 396; CHECK-LABEL: test_smallstruct_padding: 397; CHECK-DAG: ldr [[IN:w[0-9]+]], [sp, #16] 398; CHECK-DAG: ldr [[LHS:w[0-9]+]], [sp] 399; CHECK: add w0, [[LHS]], [[IN]] 400 %lhs = extractvalue [4 x i32] %struct, 0 401 %sum = add i32 %lhs, %in 402 ret i32 %sum 403} 404 405declare void @take_small_smallstruct(i64, [1 x i32]) 406define void @test_small_smallstruct() { 407; CHECK-LABEL: test_small_smallstruct: 408; CHECK-DAG: mov w0, #1 409; CHECK-DAG: mov w1, #2 410; CHECK: bl _take_small_smallstruct 411 call void @take_small_smallstruct(i64 1, [1 x i32] [i32 2]) 412 ret void 413} 414 415define void @test_bare_frameaddr(i8** %addr) { 416; CHECK-LABEL: test_bare_frameaddr: 417; CHECK: add x[[LOCAL:[0-9]+]], sp, #{{[0-9]+}} 418; CHECK: str w[[LOCAL]], 419 420 %ptr = alloca i8 421 store i8* %ptr, i8** %addr, align 4 422 ret void 423} 424 425define void @test_sret_use([8 x i64]* sret([8 x i64]) %out) { 426; CHECK-LABEL: test_sret_use: 427; CHECK: str xzr, [x8] 428 %addr = getelementptr [8 x i64], [8 x i64]* %out, i32 0, i32 0 429 store i64 0, i64* %addr 430 ret void 431} 432 433define i64 @test_sret_call() { 434; CHECK-LABEL: test_sret_call: 435; CHECK: mov x8, sp 436; CHECK: bl _test_sret_use 437 %arr = alloca [8 x i64] 438 call void @test_sret_use([8 x i64]* sret([8 x i64]) %arr) 439 440 %addr = getelementptr [8 x i64], [8 x i64]* %arr, i32 0, i32 0 441 %val = load i64, i64* %addr 442 ret i64 %val 443} 444 445define double @test_constpool() { 446; CHECK-LABEL: test_constpool: 447; CHECK: adrp x[[PAGE:[0-9]+]], [[POOL:lCPI[0-9]+_[0-9]+]]@PAGE 448; CHECK: ldr d0, [x[[PAGE]], [[POOL]]@PAGEOFF] 449 ret double 1.0e-6 450} 451 452define i8* @test_blockaddress() { 453; CHECK-LABEL: test_blockaddress: 454; CHECK: [[BLOCK:Ltmp[0-9]+]]: 455; CHECK: adrp [[PAGE:x[0-9]+]], [[BLOCK]]@PAGE 456; CHECK: add x0, [[PAGE]], [[BLOCK]]@PAGEOFF 457 br label %dest 458dest: 459 ret i8* blockaddress(@test_blockaddress, %dest) 460} 461 462define i8* @test_indirectbr(i8* %dest) { 463; CHECK-LABEL: test_indirectbr: 464; CHECK: br x0 465 indirectbr i8* %dest, [label %true, label %false] 466 467true: 468 ret i8* blockaddress(@test_indirectbr, %true) 469false: 470 ret i8* blockaddress(@test_indirectbr, %false) 471} 472 473; ISelDAGToDAG tries to fold an offset FI load (in this case var+4) into the 474; actual load instruction. This needs to be done slightly carefully since we 475; claim the FI in the process -- it doesn't need extending. 476define float @test_frameindex_offset_load() { 477; CHECK-LABEL: test_frameindex_offset_load: 478; CHECK: ldr s0, [sp, #4] 479 %arr = alloca float, i32 4, align 8 480 %addr = getelementptr inbounds float, float* %arr, i32 1 481 482 %val = load float, float* %addr, align 4 483 ret float %val 484} 485 486define void @test_unaligned_frameindex_offset_store() { 487; CHECK-LABEL: test_unaligned_frameindex_offset_store: 488; CHECK: mov x[[TMP:[0-9]+]], sp 489; CHECK: orr w[[ADDR:[0-9]+]], w[[TMP]], #0x2 490; CHECK: mov [[VAL:w[0-9]+]], #42 491; CHECK: str [[VAL]], [x[[ADDR]]] 492 %arr = alloca [4 x i32] 493 494 %addr.int = ptrtoint [4 x i32]* %arr to i32 495 %addr.nextint = add nuw i32 %addr.int, 2 496 %addr.next = inttoptr i32 %addr.nextint to i32* 497 store i32 42, i32* %addr.next 498 ret void 499} 500 501 502define {i64, i64*} @test_pre_idx(i64* %addr) { 503; CHECK-LABEL: test_pre_idx: 504 505; CHECK: add w[[ADDR:[0-9]+]], w0, #8 506; CHECK: ldr x0, [x[[ADDR]]] 507 %addr.int = ptrtoint i64* %addr to i32 508 %addr.next.int = add nuw i32 %addr.int, 8 509 %addr.next = inttoptr i32 %addr.next.int to i64* 510 %val = load i64, i64* %addr.next 511 512 %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 513 %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 514 515 ret {i64, i64*} %res 516} 517 518; Forming a post-indexed load is invalid here since the GEP needs to work when 519; %addr wraps round to 0. 520define {i64, i64*} @test_invalid_pre_idx(i64* %addr) { 521; CHECK-LABEL: test_invalid_pre_idx: 522; CHECK: add w1, w0, #8 523; CHECK: ldr x0, [x1] 524 %addr.next = getelementptr i64, i64* %addr, i32 1 525 %val = load i64, i64* %addr.next 526 527 %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 528 %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 529 530 ret {i64, i64*} %res 531} 532 533declare void @callee([8 x i32]*) 534define void @test_stack_guard() ssp { 535; CHECK-LABEL: test_stack_guard: 536; CHECK: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE 537; CHECK: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] 538; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] 539; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]] 540 541; CHECK: add x0, sp, #{{[0-9]+}} 542; CHECK: bl _callee 543 544; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE 545; CHECK-OPT: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] 546; CHECK-OPT: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] 547; CHECK-OPT: ldur [[NEW_VAL:w[0-9]+]], [x29, #[[GUARD_OFFSET]]] 548; CHECK-OPT: cmp [[GUARD_VAL]], [[NEW_VAL]] 549; CHECK-OPT: b.ne [[FAIL:LBB[0-9]+_[0-9]+]] 550 551; CHECK-OPT: [[FAIL]]: 552; CHECK-OPT-NEXT: bl ___stack_chk_fail 553 %arr = alloca [8 x i32] 554 call void @callee([8 x i32]* %arr) 555 ret void 556} 557 558declare i32 @__gxx_personality_v0(...) 559declare void @eat_landingpad_args(i32, i8*, i32) 560@_ZTI8Whatever = external global i8 561define void @test_landingpad_marshalling() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 562; CHECK-LABEL: test_landingpad_marshalling: 563; CHECK-OPT: mov x2, x1 564; CHECK-OPT: mov x1, x0 565; CHECK: bl _eat_landingpad_args 566 invoke void @callee([8 x i32]* undef) to label %done unwind label %lpad 567 568lpad: ; preds = %entry 569 %exc = landingpad { i8*, i32 } 570 catch i8* @_ZTI8Whatever 571 %pointer = extractvalue { i8*, i32 } %exc, 0 572 %selector = extractvalue { i8*, i32 } %exc, 1 573 call void @eat_landingpad_args(i32 undef, i8* %pointer, i32 %selector) 574 ret void 575 576done: 577 ret void 578} 579 580define void @test_dynamic_stackalloc() { 581; CHECK-LABEL: test_dynamic_stackalloc: 582; CHECK: sub [[REG:x[0-9]+]], sp, #32 583; CHECK: mov sp, [[REG]] 584; CHECK-OPT-NOT: ubfx 585; CHECK: bl _callee 586 br label %next 587 588next: 589 %val = alloca [8 x i32] 590 call void @callee([8 x i32]* %val) 591 ret void 592} 593 594define void @test_asm_memory(i32* %base.addr) { 595; CHECK-LABEL: test_asm_memory: 596; CHECK: add w[[ADDR:[0-9]+]], w0, #4 597; CHECK: str wzr, [x[[ADDR]] 598 %addr = getelementptr i32, i32* %base.addr, i32 1 599 call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) 600 ret void 601} 602 603define void @test_unsafe_asm_memory(i64 %val) { 604; CHECK-LABEL: test_unsafe_asm_memory: 605; CHECK: and x[[ADDR:[0-9]+]], x0, #0xffffffff 606; CHECK: str wzr, [x[[ADDR]]] 607 %addr_int = trunc i64 %val to i32 608 %addr = inttoptr i32 %addr_int to i32* 609 call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) 610 ret void 611} 612 613define [9 x i8*] @test_demoted_return(i8* %in) { 614; CHECK-LABEL: test_demoted_return: 615; CHECK: str w0, [x8, #32] 616 %res = insertvalue [9 x i8*] undef, i8* %in, 8 617 ret [9 x i8*] %res 618} 619 620define i8* @test_inttoptr(i64 %in) { 621; CHECK-LABEL: test_inttoptr: 622; CHECK: and x0, x0, #0xffffffff 623 %res = inttoptr i64 %in to i8* 624 ret i8* %res 625} 626 627declare i32 @llvm.get.dynamic.area.offset.i32() 628define i32 @test_dynamic_area() { 629; CHECK-LABEL: test_dynamic_area: 630; CHECK: mov w0, wzr 631 %res = call i32 @llvm.get.dynamic.area.offset.i32() 632 ret i32 %res 633} 634 635define void @test_pointer_vec_store(<2 x i8*>* %addr) { 636; CHECK-LABEL: test_pointer_vec_store: 637; CHECK: str xzr, [x0] 638; CHECK-NOT: str 639; CHECK-NOT: stp 640 641 store <2 x i8*> zeroinitializer, <2 x i8*>* %addr, align 16 642 ret void 643} 644 645define <2 x i8*> @test_pointer_vec_load(<2 x i8*>* %addr) { 646; CHECK-LABEL: test_pointer_vec_load: 647; CHECK: ldr d[[TMP:[0-9]+]], [x0] 648; CHECK: ushll.2d v0, v[[TMP]], #0 649 %val = load <2 x i8*>, <2 x i8*>* %addr, align 16 650 ret <2 x i8*> %val 651} 652 653define void @test_inline_asm_mem_pointer(i32* %in) { 654; CHECK-LABEL: test_inline_asm_mem_pointer: 655; CHECK: str w0, 656 tail call void asm sideeffect "ldr x0, $0", "rm"(i32* %in) 657 ret void 658} 659 660 661define void @test_struct_hi(i32 %hi) nounwind { 662; CHECK-LABEL: test_struct_hi: 663; CHECK: mov w[[IN:[0-9]+]], w0 664; CHECK: bl _get_int 665; CHECK-FAST-NEXT: mov w0, w0 666; CHECK-NEXT: bfi x0, x[[IN]], #32, #32 667; CHECK-NEXT: bl _take_pair 668 %val.64 = call i64 @get_int() 669 %val.32 = trunc i64 %val.64 to i32 670 671 %pair.0 = insertvalue [2 x i32] undef, i32 %val.32, 0 672 %pair.1 = insertvalue [2 x i32] %pair.0, i32 %hi, 1 673 call void @take_pair([2 x i32] %pair.1) 674 675 ret void 676} 677declare void @take_pair([2 x i32]) 678declare i64 @get_int() 679 680define i1 @test_icmp_ptr(i8* %in) { 681; CHECK-LABEL: test_icmp_ptr 682; CHECK: ubfx x0, x0, #31, #1 683 %res = icmp slt i8* %in, null 684 ret i1 %res 685} 686 687define void @test_multiple_icmp_ptr(i8* %l, i8* %r) { 688; CHECK-LABEL: test_multiple_icmp_ptr: 689; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]] 690; CHECK: tbnz w1, #31, [[FALSEBB]] 691 %tst1 = icmp sgt i8* %l, inttoptr (i32 -1 to i8*) 692 %tst2 = icmp sgt i8* %r, inttoptr (i32 -1 to i8*) 693 %tst = and i1 %tst1, %tst2 694 br i1 %tst, label %true, label %false 695 696true: 697 call void(...) @bar() 698 ret void 699 700false: 701 ret void 702} 703 704define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) { 705; CHECK-LABEL: test_gep_nonpow2: 706; CHECK-OPT: mov w[[SIZE:[0-9]+]], #18 707; CHECK-OPT-NEXT: smaddl x0, w1, w[[SIZE]], x0 708; CHECK-OPT-NEXT: ret 709 710; CHECK-FAST: mov w[[SIZE:[0-9]+]], #18 711; CHECK-FAST-NEXT: smaddl [[TMP:x[0-9]+]], w1, w[[SIZE]], x0 712; CHECK-FAST-NEXT: and x0, [[TMP]], #0xffffffff 713; CHECK-FAST-NEXT: ret 714 %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1 715 ret { [18 x i8] }* %tmp0 716} 717 718define void @test_bzero(i64 %in) { 719; CHECK-LABEL: test_bzero: 720; CHECK-DAG: lsr x1, x0, #32 721; CHECK-DAG: and x0, x0, #0xffffffff 722; CHECK: bl _bzero 723 724 %ptr.i32 = trunc i64 %in to i32 725 %size.64 = lshr i64 %in, 32 726 %size = trunc i64 %size.64 to i32 727 %ptr = inttoptr i32 %ptr.i32 to i8* 728 tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 0, i32 %size, i1 false) 729 ret void 730} 731 732declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) 733