1; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=non-leaf | \
2; RUN:     llvm-objdump --private-headers - | \
3; RUN:     FileCheck %s --check-prefix=CHECK-MACHO
4; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT
5; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -fast-isel -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
6
7; CHECK-MACHO: Mach header
8; CHECK-MACHO: MH_MAGIC ARM64_32 V8
9
10@var64 = global i64 zeroinitializer, align 8
11@var32 = global i32 zeroinitializer, align 4
12
13@var_got = external global i8
14
15define i32* @test_global_addr() {
16; CHECK-LABEL: test_global_addr:
17; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE
18; CHECK-OPT: add x0, [[PAGE]], _var32@PAGEOFF
19; CHECK-FAST: add [[TMP:x[0-9]+]], [[PAGE]], _var32@PAGEOFF
20; CHECK-FAST: and x0, [[TMP]], #0xffffffff
21  ret i32* @var32
22}
23
24; ADRP is necessarily 64-bit. The important point to check is that, however that
25; gets truncated to 32-bits, it's free. No need to zero out higher bits of that
26; register.
27define i64 @test_global_addr_extension() {
28; CHECK-LABEL: test_global_addr_extension:
29; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE
30; CHECK: add x0, [[PAGE]], _var32@PAGEOFF
31; CHECK-NOT: and
32; CHECK: ret
33
34  ret i64 ptrtoint(i32* @var32 to i64)
35}
36
37define i32 @test_global_value() {
38; CHECK-LABEL: test_global_value:
39; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE
40; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF]
41  %val = load i32, i32* @var32, align 4
42  ret i32 %val
43}
44
45; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here.
46define i32 @test_unsafe_indexed_add() {
47; CHECK-LABEL: test_unsafe_indexed_add:
48; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
49; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32
50; CHECK: ldr w0, [x[[ADDR]]]
51  %addr_int = ptrtoint i32* @var32 to i32
52  %addr_plus_32 = add i32 %addr_int, 32
53  %addr = inttoptr i32 %addr_plus_32 to i32*
54  %val = load i32, i32* %addr, align 4
55  ret i32 %val
56}
57
58; Since we've promised there is no unsigned overflow, @var32 must be at least
59; 32-bytes below 2^32, and we can use the load this time.
60define i32 @test_safe_indexed_add() {
61; CHECK-LABEL: test_safe_indexed_add:
62; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
63; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32
64; CHECK: ldr w0, [x[[ADDR]]]
65  %addr_int = ptrtoint i32* @var32 to i64
66  %addr_plus_32 = add nuw i64 %addr_int, 32
67  %addr = inttoptr i64 %addr_plus_32 to i32*
68  %val = load i32, i32* %addr, align 4
69  ret i32 %val
70}
71
72define i32 @test_safe_indexed_or(i32 %in) {
73; CHECK-LABEL: test_safe_indexed_or:
74; CHECK: and [[TMP:w[0-9]+]], {{w[0-9]+}}, #0xfffffff0
75; CHECK: orr w[[ADDR:[0-9]+]], [[TMP]], #0x4
76; CHECK: ldr w0, [x[[ADDR]]]
77  %addr_int = and i32 %in, -16
78  %addr_plus_4 = or i32 %addr_int, 4
79  %addr = inttoptr i32 %addr_plus_4 to i32*
80  %val = load i32, i32* %addr, align 4
81  ret i32 %val
82}
83
84
85; Promising nsw is not sufficient because the addressing mode basically
86; calculates "zext(base) + zext(offset)" and nsw only guarantees
87; "sext(base) + sext(offset) == base + offset".
88define i32 @test_unsafe_nsw_indexed_add() {
89; CHECK-LABEL: test_unsafe_nsw_indexed_add:
90; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
91; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32
92; CHECK-NOT: ubfx
93; CHECK: ldr w0, [x[[ADDR]]]
94  %addr_int = ptrtoint i32* @var32 to i32
95  %addr_plus_32 = add nsw i32 %addr_int, 32
96  %addr = inttoptr i32 %addr_plus_32 to i32*
97  %val = load i32, i32* %addr, align 4
98  ret i32 %val
99}
100
101; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here.
102define i32 @test_unsafe_unscaled_add() {
103; CHECK-LABEL: test_unsafe_unscaled_add:
104; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
105; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3
106; CHECK: ldr w0, [x[[ADDR]]]
107  %addr_int = ptrtoint i32* @var32 to i32
108  %addr_plus_3 = add i32 %addr_int, 3
109  %addr = inttoptr i32 %addr_plus_3 to i32*
110  %val = load i32, i32* %addr, align 1
111  ret i32 %val
112}
113
114; Since we've promised there is no unsigned overflow, @var32 must be at least
115; 32-bytes below 2^32, and we can use the load this time.
116define i32 @test_safe_unscaled_add() {
117; CHECK-LABEL: test_safe_unscaled_add:
118; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
119; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3
120; CHECK: ldr w0, [x[[ADDR]]]
121  %addr_int = ptrtoint i32* @var32 to i32
122  %addr_plus_3 = add nuw i32 %addr_int, 3
123  %addr = inttoptr i32 %addr_plus_3 to i32*
124  %val = load i32, i32* %addr, align 1
125  ret i32 %val
126}
127
128; Promising nsw is not sufficient because the addressing mode basically
129; calculates "zext(base) + zext(offset)" and nsw only guarantees
130; "sext(base) + sext(offset) == base + offset".
131define i32 @test_unsafe_nsw_unscaled_add() {
132; CHECK-LABEL: test_unsafe_nsw_unscaled_add:
133; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
134; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3
135; CHECK-NOT: ubfx
136; CHECK: ldr w0, [x[[ADDR]]]
137  %addr_int = ptrtoint i32* @var32 to i32
138  %addr_plus_3 = add nsw i32 %addr_int, 3
139  %addr = inttoptr i32 %addr_plus_3 to i32*
140  %val = load i32, i32* %addr, align 1
141  ret i32 %val
142}
143
144; Because the addition may wrap, it is not safe to use "ldur w0, [xN, #-3]"
145; here.
146define i32 @test_unsafe_negative_unscaled_add() {
147; CHECK-LABEL: test_unsafe_negative_unscaled_add:
148; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF
149; CHECK: sub w[[ADDR:[0-9]+]], w[[VAR32]], #3
150; CHECK: ldr w0, [x[[ADDR]]]
151  %addr_int = ptrtoint i32* @var32 to i32
152  %addr_minus_3 = add i32 %addr_int, -3
153  %addr = inttoptr i32 %addr_minus_3 to i32*
154  %val = load i32, i32* %addr, align 1
155  ret i32 %val
156}
157
158define i8* @test_got_addr() {
159; CHECK-LABEL: test_got_addr:
160; CHECK: adrp x[[PAGE:[0-9]+]], _var_got@GOTPAGE
161; CHECK-OPT: ldr w0, [x[[PAGE]], _var_got@GOTPAGEOFF]
162; CHECK-FAST: ldr w[[TMP:[0-9]+]], [x[[PAGE]], _var_got@GOTPAGEOFF]
163; CHECK-FAST: and x0, x[[TMP]], #0xffffffff
164  ret i8* @var_got
165}
166
167define float @test_va_arg_f32(i8** %list) {
168; CHECK-LABEL: test_va_arg_f32:
169
170; CHECK: ldr w[[START:[0-9]+]], [x0]
171; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #8
172; CHECK: str [[AFTER]], [x0]
173
174  ; Floating point arguments get promoted to double as per C99.
175; CHECK: ldr [[DBL:d[0-9]+]], [x[[START]]]
176; CHECK: fcvt s0, [[DBL]]
177  %res = va_arg i8** %list, float
178  ret float %res
179}
180
181; Interesting point is that the slot is 4 bytes.
182define i8 @test_va_arg_i8(i8** %list) {
183; CHECK-LABEL: test_va_arg_i8:
184
185; CHECK: ldr w[[START:[0-9]+]], [x0]
186; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #4
187; CHECK: str [[AFTER]], [x0]
188
189  ; i8 gets promoted to int (again, as per C99).
190; CHECK: ldr w0, [x[[START]]]
191
192  %res = va_arg i8** %list, i8
193  ret i8 %res
194}
195
196; Interesting point is that the slot needs aligning (again, min size is 4
197; bytes).
198define i64 @test_va_arg_i64(i64** %list) {
199; CHECK-LABEL: test_va_arg_i64:
200
201  ; Update the list for the next user (minimum slot size is 4, but the actual
202  ; argument is 8 which had better be reflected!)
203; CHECK: ldr w[[UNALIGNED_START:[0-9]+]], [x0]
204; CHECK: add [[ALIGN_TMP:x[0-9]+]], x[[UNALIGNED_START]], #7
205; CHECK: and x[[START:[0-9]+]], [[ALIGN_TMP]], #0x1fffffff8
206; CHECK: add w[[AFTER:[0-9]+]], w[[START]], #8
207; CHECK: str w[[AFTER]], [x0]
208
209; CHECK: ldr x0, [x[[START]]]
210
211  %res = va_arg i64** %list, i64
212  ret i64 %res
213}
214
215declare void @bar(...)
216define void @test_va_call(i8 %l, i8 %r, float %in, i8* %ptr) {
217; CHECK-LABEL: test_va_call:
218; CHECK: add [[SUM:w[0-9]+]], {{w[0-9]+}}, w1
219
220; CHECK-DAG: str w2, [sp, #32]
221; CHECK-DAG: str xzr, [sp, #24]
222; CHECK-DAG: str s0, [sp, #16]
223; CHECK-DAG: str xzr, [sp, #8]
224; CHECK-DAG: str [[SUM]], [sp]
225
226  ; Add them to ensure real promotion occurs.
227  %sum = add i8 %l, %r
228  call void(...) @bar(i8 %sum, i64 0, float %in, double 0.0, i8* %ptr)
229  ret void
230}
231
232declare i8* @llvm.frameaddress(i32)
233
234define i8* @test_frameaddr() {
235; CHECK-LABEL: test_frameaddr:
236; CHECK-OPT: ldr x0, [x29]
237; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x29]
238; CHECK-FAST: and x0, [[TMP]], #0xffffffff
239  %val = call i8* @llvm.frameaddress(i32 1)
240  ret i8* %val
241}
242
243declare i8* @llvm.returnaddress(i32)
244
245define i8* @test_toplevel_returnaddr() {
246; CHECK-LABEL: test_toplevel_returnaddr:
247; CHECK-OPT: mov x0, x30
248; CHECK-FAST: and x0, x30, #0xffffffff
249  %val = call i8* @llvm.returnaddress(i32 0)
250  ret i8* %val
251}
252
253define i8* @test_deep_returnaddr() {
254; CHECK-LABEL: test_deep_returnaddr:
255; CHECK: ldr x[[FRAME_REC:[0-9]+]], [x29]
256; CHECK-OPT: ldr x30, [x[[FRAME_REC]], #8]
257; CHECK-OPT: hint #7
258; CHECK-OPT: mov x0, x30
259; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x[[FRAME_REC]], #8]
260; CHECK-FAST: and x0, [[TMP]], #0xffffffff
261  %val = call i8* @llvm.returnaddress(i32 1)
262  ret i8* %val
263}
264
265define void @test_indirect_call(void()* %func) {
266; CHECK-LABEL: test_indirect_call:
267; CHECK: blr x0
268  call void() %func()
269  ret void
270}
271
272; Safe to use the unextended address here
273define void @test_indirect_safe_call(i32* %weird_funcs) {
274; CHECK-LABEL: test_indirect_safe_call:
275; CHECK: add w[[ADDR32:[0-9]+]], w0, #4
276; CHECK-OPT-NOT: ubfx
277; CHECK: blr x[[ADDR32]]
278  %addr = getelementptr i32, i32* %weird_funcs, i32 1
279  %func = bitcast i32* %addr to void()*
280  call void() %func()
281  ret void
282}
283
284declare void @simple()
285define void @test_simple_tail_call() {
286; CHECK-LABEL: test_simple_tail_call:
287; CHECK: b _simple
288  tail call void @simple()
289  ret void
290}
291
292define void @test_indirect_tail_call(void()* %func) {
293; CHECK-LABEL: test_indirect_tail_call:
294; CHECK: br x0
295  tail call void() %func()
296  ret void
297}
298
299; Safe to use the unextended address here
300define void @test_indirect_safe_tail_call(i32* %weird_funcs) {
301; CHECK-LABEL: test_indirect_safe_tail_call:
302; CHECK: add w[[ADDR32:[0-9]+]], w0, #4
303; CHECK-OPT-NOT: ubfx
304; CHECK-OPT: br x[[ADDR32]]
305  %addr = getelementptr i32, i32* %weird_funcs, i32 1
306  %func = bitcast i32* %addr to void()*
307  tail call void() %func()
308  ret void
309}
310
311; For the "armv7k" slice, Clang will be emitting some small structs as [N x
312; i32]. For ABI compatibility with arm64_32 these need to be passed in *X*
313; registers (e.g. [2 x i32] would be packed into a single register).
314
315define i32 @test_in_smallstruct_low([3 x i32] %in) {
316; CHECK-LABEL: test_in_smallstruct_low:
317; CHECK: mov x0, x1
318  %val = extractvalue [3 x i32] %in, 2
319  ret i32 %val
320}
321
322define i32 @test_in_smallstruct_high([3 x i32] %in) {
323; CHECK-LABEL: test_in_smallstruct_high:
324; CHECK: lsr x0, x0, #32
325  %val = extractvalue [3 x i32] %in, 1
326  ret i32 %val
327}
328
329; The 64-bit DarwinPCS ABI has the quirk that structs on the stack are always
330; 64-bit aligned. This must not happen for arm64_32 since othwerwise va_arg will
331; be incompatible with the armv7k ABI.
332define i32 @test_in_smallstruct_stack([8 x i64], i32, [3 x i32] %in) {
333; CHECK-LABEL: test_in_smallstruct_stack:
334; CHECK: ldr w0, [sp, #4]
335  %val = extractvalue [3 x i32] %in, 0
336  ret i32 %val
337}
338
339define [2 x i32] @test_ret_smallstruct([3 x i32] %in) {
340; CHECK-LABEL: test_ret_smallstruct:
341; CHECK: mov x0, #1
342; CHECK: movk x0, #2, lsl #32
343
344  ret [2 x i32] [i32 1, i32 2]
345}
346
347declare void @smallstruct_callee([4 x i32])
348define void @test_call_smallstruct() {
349; CHECK-LABEL: test_call_smallstruct:
350; CHECK: mov x0, #1
351; CHECK: movk x0, #2, lsl #32
352; CHECK: mov x1, #3
353; CHECK: movk x1, #4, lsl #32
354; CHECK: bl _smallstruct_callee
355
356  call void @smallstruct_callee([4 x i32] [i32 1, i32 2, i32 3, i32 4])
357  ret void
358}
359
360declare void @smallstruct_callee_stack([8 x i64], i32, [2 x i32])
361define void @test_call_smallstruct_stack() {
362; CHECK-LABEL: test_call_smallstruct_stack:
363; CHECK: mov [[VAL:x[0-9]+]], #1
364; CHECK: movk [[VAL]], #2, lsl #32
365; CHECK: stur [[VAL]], [sp, #4]
366
367  call void @smallstruct_callee_stack([8 x i64] undef, i32 undef, [2 x i32] [i32 1, i32 2])
368  ret void
369}
370
371declare [3 x i32] @returns_smallstruct()
372define i32 @test_use_smallstruct_low() {
373; CHECK-LABEL: test_use_smallstruct_low:
374; CHECK: bl _returns_smallstruct
375; CHECK: mov x0, x1
376
377  %struct = call [3 x i32] @returns_smallstruct()
378  %val = extractvalue [3 x i32] %struct, 2
379  ret i32 %val
380}
381
382define i32 @test_use_smallstruct_high() {
383; CHECK-LABEL: test_use_smallstruct_high:
384; CHECK: bl _returns_smallstruct
385; CHECK: lsr x0, x0, #32
386
387  %struct = call [3 x i32] @returns_smallstruct()
388  %val = extractvalue [3 x i32] %struct, 1
389  ret i32 %val
390}
391
392; If a small struct can't be allocated to x0-x7, the remaining registers should
393; be marked as unavailable and subsequent GPR arguments should also be on the
394; stack. Obviously the struct itself should be passed entirely on the stack.
395define i32 @test_smallstruct_padding([7 x i64], [4 x i32] %struct, i32 %in) {
396; CHECK-LABEL: test_smallstruct_padding:
397; CHECK-DAG: ldr [[IN:w[0-9]+]], [sp, #16]
398; CHECK-DAG: ldr [[LHS:w[0-9]+]], [sp]
399; CHECK: add w0, [[LHS]], [[IN]]
400  %lhs = extractvalue [4 x i32] %struct, 0
401  %sum = add i32 %lhs, %in
402  ret i32 %sum
403}
404
405declare void @take_small_smallstruct(i64, [1 x i32])
406define void @test_small_smallstruct() {
407; CHECK-LABEL: test_small_smallstruct:
408; CHECK-DAG: mov w0, #1
409; CHECK-DAG: mov w1, #2
410; CHECK: bl _take_small_smallstruct
411  call void @take_small_smallstruct(i64 1, [1 x i32] [i32 2])
412  ret void
413}
414
415define void @test_bare_frameaddr(i8** %addr) {
416; CHECK-LABEL: test_bare_frameaddr:
417; CHECK: add x[[LOCAL:[0-9]+]], sp, #{{[0-9]+}}
418; CHECK: str w[[LOCAL]],
419
420  %ptr = alloca i8
421  store i8* %ptr, i8** %addr, align 4
422  ret void
423}
424
425define void @test_sret_use([8 x i64]* sret([8 x i64]) %out) {
426; CHECK-LABEL: test_sret_use:
427; CHECK: str xzr, [x8]
428  %addr = getelementptr [8 x i64], [8 x i64]* %out, i32 0, i32 0
429  store i64 0, i64* %addr
430  ret void
431}
432
433define i64 @test_sret_call() {
434; CHECK-LABEL: test_sret_call:
435; CHECK: mov x8, sp
436; CHECK: bl _test_sret_use
437  %arr = alloca [8 x i64]
438  call void @test_sret_use([8 x i64]* sret([8 x i64]) %arr)
439
440  %addr = getelementptr [8 x i64], [8 x i64]* %arr, i32 0, i32 0
441  %val = load i64, i64* %addr
442  ret i64 %val
443}
444
445define double @test_constpool() {
446; CHECK-LABEL: test_constpool:
447; CHECK: adrp x[[PAGE:[0-9]+]], [[POOL:lCPI[0-9]+_[0-9]+]]@PAGE
448; CHECK: ldr d0, [x[[PAGE]], [[POOL]]@PAGEOFF]
449  ret double 1.0e-6
450}
451
452define i8* @test_blockaddress() {
453; CHECK-LABEL: test_blockaddress:
454; CHECK: [[BLOCK:Ltmp[0-9]+]]:
455; CHECK: adrp [[PAGE:x[0-9]+]], [[BLOCK]]@PAGE
456; CHECK: add x0, [[PAGE]], [[BLOCK]]@PAGEOFF
457  br label %dest
458dest:
459  ret i8* blockaddress(@test_blockaddress, %dest)
460}
461
462define i8* @test_indirectbr(i8* %dest) {
463; CHECK-LABEL: test_indirectbr:
464; CHECK: br x0
465  indirectbr i8* %dest, [label %true, label %false]
466
467true:
468  ret i8* blockaddress(@test_indirectbr, %true)
469false:
470  ret i8* blockaddress(@test_indirectbr, %false)
471}
472
473; ISelDAGToDAG tries to fold an offset FI load (in this case var+4) into the
474; actual load instruction. This needs to be done slightly carefully since we
475; claim the FI in the process -- it doesn't need extending.
476define float @test_frameindex_offset_load() {
477; CHECK-LABEL: test_frameindex_offset_load:
478; CHECK: ldr s0, [sp, #4]
479  %arr = alloca float, i32 4, align 8
480  %addr = getelementptr inbounds float, float* %arr, i32 1
481
482  %val = load float, float* %addr, align 4
483  ret float %val
484}
485
486define void @test_unaligned_frameindex_offset_store() {
487; CHECK-LABEL: test_unaligned_frameindex_offset_store:
488; CHECK: mov x[[TMP:[0-9]+]], sp
489; CHECK: orr w[[ADDR:[0-9]+]], w[[TMP]], #0x2
490; CHECK: mov [[VAL:w[0-9]+]], #42
491; CHECK: str [[VAL]], [x[[ADDR]]]
492  %arr = alloca [4 x i32]
493
494  %addr.int = ptrtoint [4 x i32]* %arr to i32
495  %addr.nextint = add nuw i32 %addr.int, 2
496  %addr.next = inttoptr i32 %addr.nextint to i32*
497  store i32 42, i32* %addr.next
498  ret void
499}
500
501
502define {i64, i64*} @test_pre_idx(i64* %addr) {
503; CHECK-LABEL: test_pre_idx:
504
505; CHECK: add w[[ADDR:[0-9]+]], w0, #8
506; CHECK: ldr x0, [x[[ADDR]]]
507  %addr.int = ptrtoint i64* %addr to i32
508  %addr.next.int = add nuw i32 %addr.int, 8
509  %addr.next = inttoptr i32 %addr.next.int to i64*
510  %val = load i64, i64* %addr.next
511
512  %tmp = insertvalue {i64, i64*} undef, i64 %val, 0
513  %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1
514
515  ret {i64, i64*} %res
516}
517
518; Forming a post-indexed load is invalid here since the GEP needs to work when
519; %addr wraps round to 0.
520define {i64, i64*} @test_invalid_pre_idx(i64* %addr) {
521; CHECK-LABEL: test_invalid_pre_idx:
522; CHECK: add w1, w0, #8
523; CHECK: ldr x0, [x1]
524  %addr.next = getelementptr i64, i64* %addr, i32 1
525  %val = load i64, i64* %addr.next
526
527  %tmp = insertvalue {i64, i64*} undef, i64 %val, 0
528  %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1
529
530  ret {i64, i64*} %res
531}
532
533declare void @callee([8 x i32]*)
534define void @test_stack_guard() ssp {
535; CHECK-LABEL: test_stack_guard:
536; CHECK: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE
537; CHECK: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF]
538; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]]
539; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]]
540
541; CHECK: add x0, sp, #{{[0-9]+}}
542; CHECK: bl _callee
543
544; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE
545; CHECK-OPT: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF]
546; CHECK-OPT: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]]
547; CHECK-OPT: ldur [[NEW_VAL:w[0-9]+]], [x29, #[[GUARD_OFFSET]]]
548; CHECK-OPT: cmp [[GUARD_VAL]], [[NEW_VAL]]
549; CHECK-OPT: b.ne [[FAIL:LBB[0-9]+_[0-9]+]]
550
551; CHECK-OPT: [[FAIL]]:
552; CHECK-OPT-NEXT: bl ___stack_chk_fail
553  %arr = alloca [8 x i32]
554  call void @callee([8 x i32]* %arr)
555  ret void
556}
557
558declare i32 @__gxx_personality_v0(...)
559declare void @eat_landingpad_args(i32, i8*, i32)
560@_ZTI8Whatever = external global i8
561define void @test_landingpad_marshalling() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
562; CHECK-LABEL: test_landingpad_marshalling:
563; CHECK-OPT: mov x2, x1
564; CHECK-OPT: mov x1, x0
565; CHECK: bl _eat_landingpad_args
566  invoke void @callee([8 x i32]* undef) to label %done unwind label %lpad
567
568lpad:                                             ; preds = %entry
569  %exc = landingpad { i8*, i32 }
570          catch i8* @_ZTI8Whatever
571  %pointer = extractvalue { i8*, i32 } %exc, 0
572  %selector = extractvalue { i8*, i32 } %exc, 1
573  call void @eat_landingpad_args(i32 undef, i8* %pointer, i32 %selector)
574  ret void
575
576done:
577  ret void
578}
579
580define void @test_dynamic_stackalloc() {
581; CHECK-LABEL: test_dynamic_stackalloc:
582; CHECK: sub [[REG:x[0-9]+]], sp, #32
583; CHECK: mov sp, [[REG]]
584; CHECK-OPT-NOT: ubfx
585; CHECK: bl _callee
586  br label %next
587
588next:
589  %val = alloca [8 x i32]
590  call void @callee([8 x i32]* %val)
591  ret void
592}
593
594define void @test_asm_memory(i32* %base.addr) {
595; CHECK-LABEL: test_asm_memory:
596; CHECK: add w[[ADDR:[0-9]+]], w0, #4
597; CHECK: str wzr, [x[[ADDR]]
598  %addr = getelementptr i32, i32* %base.addr, i32 1
599  call void asm sideeffect "str wzr, $0", "*m"(i32* %addr)
600  ret void
601}
602
603define void @test_unsafe_asm_memory(i64 %val) {
604; CHECK-LABEL: test_unsafe_asm_memory:
605; CHECK: and x[[ADDR:[0-9]+]], x0, #0xffffffff
606; CHECK: str wzr, [x[[ADDR]]]
607  %addr_int = trunc i64 %val to i32
608  %addr = inttoptr i32 %addr_int to i32*
609  call void asm sideeffect "str wzr, $0", "*m"(i32* %addr)
610  ret void
611}
612
613define [9 x i8*] @test_demoted_return(i8* %in) {
614; CHECK-LABEL: test_demoted_return:
615; CHECK: str w0, [x8, #32]
616  %res = insertvalue [9 x i8*] undef, i8* %in, 8
617  ret [9 x i8*] %res
618}
619
620define i8* @test_inttoptr(i64 %in) {
621; CHECK-LABEL: test_inttoptr:
622; CHECK: and x0, x0, #0xffffffff
623  %res = inttoptr i64 %in to i8*
624  ret i8* %res
625}
626
627declare i32 @llvm.get.dynamic.area.offset.i32()
628define i32 @test_dynamic_area() {
629; CHECK-LABEL: test_dynamic_area:
630; CHECK: mov w0, wzr
631  %res = call i32 @llvm.get.dynamic.area.offset.i32()
632  ret i32 %res
633}
634
635define void @test_pointer_vec_store(<2 x i8*>* %addr) {
636; CHECK-LABEL: test_pointer_vec_store:
637; CHECK: str xzr, [x0]
638; CHECK-NOT: str
639; CHECK-NOT: stp
640
641  store <2 x i8*> zeroinitializer, <2 x i8*>* %addr, align 16
642  ret void
643}
644
645define <2 x i8*> @test_pointer_vec_load(<2 x i8*>* %addr) {
646; CHECK-LABEL: test_pointer_vec_load:
647; CHECK: ldr d[[TMP:[0-9]+]], [x0]
648; CHECK: ushll.2d v0, v[[TMP]], #0
649  %val = load <2 x i8*>, <2 x i8*>* %addr, align 16
650  ret <2 x i8*> %val
651}
652
653define void @test_inline_asm_mem_pointer(i32* %in) {
654; CHECK-LABEL: test_inline_asm_mem_pointer:
655; CHECK: str w0,
656  tail call void asm sideeffect "ldr x0, $0", "rm"(i32* %in)
657  ret void
658}
659
660
661define void @test_struct_hi(i32 %hi) nounwind {
662; CHECK-LABEL: test_struct_hi:
663; CHECK: mov w[[IN:[0-9]+]], w0
664; CHECK: bl _get_int
665; CHECK-FAST-NEXT: mov w0, w0
666; CHECK-NEXT: bfi x0, x[[IN]], #32, #32
667; CHECK-NEXT: bl _take_pair
668  %val.64 = call i64 @get_int()
669  %val.32 = trunc i64 %val.64 to i32
670
671  %pair.0 = insertvalue [2 x i32] undef, i32 %val.32, 0
672  %pair.1 = insertvalue [2 x i32] %pair.0, i32 %hi, 1
673  call void @take_pair([2 x i32] %pair.1)
674
675  ret void
676}
677declare void @take_pair([2 x i32])
678declare i64 @get_int()
679
680define i1 @test_icmp_ptr(i8* %in) {
681; CHECK-LABEL: test_icmp_ptr
682; CHECK: ubfx x0, x0, #31, #1
683  %res = icmp slt i8* %in, null
684  ret i1 %res
685}
686
687define void @test_multiple_icmp_ptr(i8* %l, i8* %r) {
688; CHECK-LABEL: test_multiple_icmp_ptr:
689; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]]
690; CHECK: tbnz w1, #31, [[FALSEBB]]
691  %tst1 = icmp sgt i8* %l, inttoptr (i32 -1 to i8*)
692  %tst2 = icmp sgt i8* %r, inttoptr (i32 -1 to i8*)
693  %tst = and i1 %tst1, %tst2
694  br i1 %tst, label %true, label %false
695
696true:
697  call void(...) @bar()
698  ret void
699
700false:
701  ret void
702}
703
704define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) {
705; CHECK-LABEL: test_gep_nonpow2:
706; CHECK-OPT:      mov w[[SIZE:[0-9]+]], #18
707; CHECK-OPT-NEXT: smaddl x0, w1, w[[SIZE]], x0
708; CHECK-OPT-NEXT: ret
709
710; CHECK-FAST:      mov w[[SIZE:[0-9]+]], #18
711; CHECK-FAST-NEXT: smaddl [[TMP:x[0-9]+]], w1, w[[SIZE]], x0
712; CHECK-FAST-NEXT: and x0, [[TMP]], #0xffffffff
713; CHECK-FAST-NEXT: ret
714  %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1
715  ret { [18 x i8] }* %tmp0
716}
717
718define void @test_bzero(i64 %in)  {
719; CHECK-LABEL: test_bzero:
720; CHECK-DAG: lsr x1, x0, #32
721; CHECK-DAG: and x0, x0, #0xffffffff
722; CHECK: bl _bzero
723
724  %ptr.i32 = trunc i64 %in to i32
725  %size.64 = lshr i64 %in, 32
726  %size = trunc i64 %size.64 to i32
727  %ptr = inttoptr i32 %ptr.i32 to i8*
728  tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 0, i32 %size, i1 false)
729  ret void
730}
731
732declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1)
733