1; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
2; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck %s
3; Shrink wrapping currently does not kick in because we have a TLS CALL
4; in the entry block and it will clobber the link register.
5
6; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s
7
8%struct.S = type { i8 }
9
10@sg = internal thread_local global %struct.S zeroinitializer, align 1
11@__dso_handle = external global i8
12@__tls_guard = internal thread_local unnamed_addr global i1 false
13@sum1 = internal thread_local global i32 0, align 4
14
15declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
16declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
17declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
18
19define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
20  %.b.i = load i1, i1* @__tls_guard, align 1
21  br i1 %.b.i, label %__tls_init.exit, label %init.i
22
23init.i:
24  store i1 true, i1* @__tls_guard, align 1
25  %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg)
26  %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle)
27  br label %__tls_init.exit
28
29__tls_init.exit:
30  ret %struct.S* @sg
31}
32
33; CHECK-LABEL: _ZTW2sg
34; CHECK-NOT: stp d31, d30
35; CHECK-NOT: stp d29, d28
36; CHECK-NOT: stp d27, d26
37; CHECK-NOT: stp d25, d24
38; CHECK-NOT: stp d23, d22
39; CHECK-NOT: stp d21, d20
40; CHECK-NOT: stp d19, d18
41; CHECK-NOT: stp d17, d16
42; CHECK-NOT: stp d7, d6
43; CHECK-NOT: stp d5, d4
44; CHECK-NOT: stp d3, d2
45; CHECK-NOT: stp d1, d0
46; CHECK-NOT: stp x20, x19
47; FIXME: The splitting logic in the register allocator fails to split along
48;        control flow here, we used to get this right by accident before...
49; CHECK-NOTXX: stp x14, x13
50; CHECK-NOT: stp x12, x11
51; CHECK-NOT: stp x10, x9
52; CHECK-NOT: stp x8, x7
53; CHECK-NOT: stp x6, x5
54; CHECK-NOT: stp x4, x3
55; CHECK-NOT: stp x2, x1
56; CHECK: blr
57; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
58; CHECK: blr
59; CHECK: tlv_atexit
60; CHECK: [[BB_end]]:
61; CHECK: blr
62; CHECK-NOT: ldp x2, x1
63; CHECK-NOT: ldp x4, x3
64; CHECK-NOT: ldp x6, x5
65; CHECK-NOT: ldp x8, x7
66; CHECK-NOT: ldp x10, x9
67; CHECK-NOT: ldp x12, x11
68; CHECK-NOTXX: ldp x14, x13
69; CHECK-NOT: ldp x20, x19
70; CHECK-NOT: ldp d1, d0
71; CHECK-NOT: ldp d3, d2
72; CHECK-NOT: ldp d5, d4
73; CHECK-NOT: ldp d7, d6
74; CHECK-NOT: ldp d17, d16
75; CHECK-NOT: ldp d19, d18
76; CHECK-NOT: ldp d21, d20
77; CHECK-NOT: ldp d23, d22
78; CHECK-NOT: ldp d25, d24
79; CHECK-NOT: ldp d27, d26
80; CHECK-NOT: ldp d29, d28
81; CHECK-NOT: ldp d31, d30
82
83; CHECK-O0-LABEL: _ZTW2sg
84; CHECK-O0: stp d31, d30
85; CHECK-O0: stp d29, d28
86; CHECK-O0: stp d27, d26
87; CHECK-O0: stp d25, d24
88; CHECK-O0: stp d23, d22
89; CHECK-O0: stp d21, d20
90; CHECK-O0: stp d19, d18
91; CHECK-O0: stp d17, d16
92; CHECK-O0: stp d7, d6
93; CHECK-O0: stp d5, d4
94; CHECK-O0: stp d3, d2
95; CHECK-O0: stp d1, d0
96; CHECK-O0: stp x14, x13
97; CHECK-O0: stp x12, x11
98; CHECK-O0: stp x10, x9
99; CHECK-O0: stp x8, x7
100; CHECK-O0: stp x6, x5
101; CHECK-O0: stp x4, x3
102; CHECK-O0: stp x2, x1
103; CHECK-O0: blr
104; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
105; CHECK-O0: blr
106; CHECK-O0: tlv_atexit
107; CHECK-O0: [[BB_end]]:
108; CHECK-O0: blr
109; CHECK-O0: ldp x2, x1
110; CHECK-O0: ldp x4, x3
111; CHECK-O0: ldp x6, x5
112; CHECK-O0: ldp x8, x7
113; CHECK-O0: ldp x10, x9
114; CHECK-O0: ldp x12, x11
115; CHECK-O0: ldp x14, x13
116; CHECK-O0: ldp d1, d0
117; CHECK-O0: ldp d3, d2
118; CHECK-O0: ldp d5, d4
119; CHECK-O0: ldp d7, d6
120; CHECK-O0: ldp d17, d16
121; CHECK-O0: ldp d19, d18
122; CHECK-O0: ldp d21, d20
123; CHECK-O0: ldp d23, d22
124; CHECK-O0: ldp d25, d24
125; CHECK-O0: ldp d27, d26
126; CHECK-O0: ldp d29, d28
127; CHECK-O0: ldp d31, d30
128
129; CHECK-LABEL: _ZTW4sum1
130; CHECK-NOT: stp d31, d30
131; CHECK-NOT: stp d29, d28
132; CHECK-NOT: stp d27, d26
133; CHECK-NOT: stp d25, d24
134; CHECK-NOT: stp d23, d22
135; CHECK-NOT: stp d21, d20
136; CHECK-NOT: stp d19, d18
137; CHECK-NOT: stp d17, d16
138; CHECK-NOT: stp d7, d6
139; CHECK-NOT: stp d5, d4
140; CHECK-NOT: stp d3, d2
141; CHECK-NOT: stp d1, d0
142; CHECK-NOT: stp x20, x19
143; CHECK-NOT: stp x14, x13
144; CHECK-NOT: stp x12, x11
145; CHECK-NOT: stp x10, x9
146; CHECK-NOT: stp x8, x7
147; CHECK-NOT: stp x6, x5
148; CHECK-NOT: stp x4, x3
149; CHECK-NOT: stp x2, x1
150; CHECK: blr
151
152; CHECK-O0-LABEL: _ZTW4sum1
153; CHECK-O0-NOT: vstr
154; CHECK-O0-NOT: vldr
155define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
156  ret i32* @sum1
157}
158
159; Make sure at O0, we don't generate spilling/reloading of the CSRs.
160; CHECK-O0-LABEL: tls_test2
161; CHECK-O0-NOT: stp d31, d30
162; CHECK-O0-NOT: stp d29, d28
163; CHECK-O0-NOT: stp d27, d26
164; CHECK-O0-NOT: stp d25, d24
165; CHECK-O0-NOT: stp d23, d22
166; CHECK-O0-NOT: stp d21, d20
167; CHECK-O0-NOT: stp d19, d18
168; CHECK-O0-NOT: stp d17, d16
169; CHECK-O0-NOT: stp d7, d6
170; CHECK-O0-NOT: stp d5, d4
171; CHECK-O0-NOT: stp d3, d2
172; CHECK-O0-NOT: stp d1, d0
173; CHECK-O0-NOT: stp x20, x19
174; CHECK-O0-NOT: stp x14, x13
175; CHECK-O0-NOT: stp x12, x11
176; CHECK-O0-NOT: stp x10, x9
177; CHECK-O0-NOT: stp x8, x7
178; CHECK-O0-NOT: stp x6, x5
179; CHECK-O0-NOT: stp x4, x3
180; CHECK-O0-NOT: stp x2, x1
181; CHECK-O0: bl {{.*}}tls_helper
182; CHECK-O0-NOT: ldp x2, x1
183; CHECK-O0-NOT: ldp x4, x3
184; CHECK-O0-NOT: ldp x6, x5
185; CHECK-O0-NOT: ldp x8, x7
186; CHECK-O0-NOT: ldp x10, x9
187; CHECK-O0-NOT: ldp x12, x11
188; CHECK-O0-NOT: ldp x14, x13
189; CHECK-O0-NOT: ldp x20, x19
190; CHECK-O0-NOT: ldp d1, d0
191; CHECK-O0-NOT: ldp d3, d2
192; CHECK-O0-NOT: ldp d5, d4
193; CHECK-O0-NOT: ldp d7, d6
194; CHECK-O0-NOT: ldp d17, d16
195; CHECK-O0-NOT: ldp d19, d18
196; CHECK-O0-NOT: ldp d21, d20
197; CHECK-O0-NOT: ldp d23, d22
198; CHECK-O0-NOT: ldp d25, d24
199; CHECK-O0-NOT: ldp d27, d26
200; CHECK-O0-NOT: ldp d29, d28
201; CHECK-O0-NOT: ldp d31, d30
202; CHECK-O0: ret
203%class.C = type { i32 }
204@tC = internal thread_local global %class.C zeroinitializer, align 4
205declare cxx_fast_tlscc void @tls_helper()
206define cxx_fast_tlscc %class.C* @tls_test2() #1 {
207  call cxx_fast_tlscc void @tls_helper()
208  ret %class.C* @tC
209}
210
211; Make sure we do not allow tail call when caller and callee have different
212; calling conventions.
213declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this)
214; CHECK-LABEL: tls_test
215; CHECK: bl __tlv_atexit
216define cxx_fast_tlscc void @__tls_test() {
217entry:
218  store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4
219  %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1
220  ret void
221}
222
223attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
224attributes #1 = { nounwind }
225