1; Test the handling of base + 12-bit displacement addresses for large frames,
2; in cases where no 20-bit form exists.  The tests here assume z10 register
3; pressure, without the high words being available.
4;
5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
6; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
7; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
8; RUN:   FileCheck -check-prefix=CHECK-FP %s
9
10; This file tests what happens when a displacement is converted from
11; being relative to the start of a frame object to being relative to
12; the frame itself.  In some cases the test is only possible if two
13; objects are allocated.
14;
15; Rather than rely on a particular order for those objects, the tests
16; instead allocate two objects of the same size and apply the test to
17; both of them.  For consistency, all tests follow this model, even if
18; one object would actually be enough.
19
20; First check the highest in-range offset after conversion, which is 4092
21; for word-addressing instructions like MVHI.
22;
23; The last in-range doubleword offset is 4088.  Since the frame has two
24; emergency spill slots at 160(%r15), the amount that we need to allocate
25; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978
26; words.
27define void @f1() {
28; CHECK-NOFP-LABEL: f1:
29; CHECK-NOFP: mvhi 4092(%r15), 42
30; CHECK-NOFP: br %r14
31;
32; CHECK-FP-LABEL: f1:
33; CHECK-FP: mvhi 4092(%r11), 42
34; CHECK-FP: br %r14
35  %region1 = alloca [978 x i32], align 8
36  %region2 = alloca [978 x i32], align 8
37  %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 1
38  %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 1
39  store volatile i32 42, i32 *%ptr1
40  store volatile i32 42, i32 *%ptr2
41  ret void
42}
43
44; Test the first out-of-range offset.  We cannot use an index register here.
45define void @f2() {
46; CHECK-NOFP-LABEL: f2:
47; CHECK-NOFP: lay %r1, 4096(%r15)
48; CHECK-NOFP: mvhi 0(%r1), 42
49; CHECK-NOFP: br %r14
50;
51; CHECK-FP-LABEL: f2:
52; CHECK-FP: lay %r1, 4096(%r11)
53; CHECK-FP: mvhi 0(%r1), 42
54; CHECK-FP: br %r14
55  %region1 = alloca [978 x i32], align 8
56  %region2 = alloca [978 x i32], align 8
57  %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
58  %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
59  store volatile i32 42, i32 *%ptr1
60  store volatile i32 42, i32 *%ptr2
61  ret void
62}
63
64; Test the next offset after that.
65define void @f3() {
66; CHECK-NOFP-LABEL: f3:
67; CHECK-NOFP: lay %r1, 4096(%r15)
68; CHECK-NOFP: mvhi 4(%r1), 42
69; CHECK-NOFP: br %r14
70;
71; CHECK-FP-LABEL: f3:
72; CHECK-FP: lay %r1, 4096(%r11)
73; CHECK-FP: mvhi 4(%r1), 42
74; CHECK-FP: br %r14
75  %region1 = alloca [978 x i32], align 8
76  %region2 = alloca [978 x i32], align 8
77  %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 3
78  %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 3
79  store volatile i32 42, i32 *%ptr1
80  store volatile i32 42, i32 *%ptr2
81  ret void
82}
83
84; Add 4096 bytes (1024 words) to the size of each object and repeat.
85define void @f4() {
86; CHECK-NOFP-LABEL: f4:
87; CHECK-NOFP: lay %r1, 4096(%r15)
88; CHECK-NOFP: mvhi 4092(%r1), 42
89; CHECK-NOFP: br %r14
90;
91; CHECK-FP-LABEL: f4:
92; CHECK-FP: lay %r1, 4096(%r11)
93; CHECK-FP: mvhi 4092(%r1), 42
94; CHECK-FP: br %r14
95  %region1 = alloca [2002 x i32], align 8
96  %region2 = alloca [2002 x i32], align 8
97  %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 1
98  %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 1
99  store volatile i32 42, i32 *%ptr1
100  store volatile i32 42, i32 *%ptr2
101  ret void
102}
103
104; ...as above.
105define void @f5() {
106; CHECK-NOFP-LABEL: f5:
107; CHECK-NOFP: lay %r1, 8192(%r15)
108; CHECK-NOFP: mvhi 0(%r1), 42
109; CHECK-NOFP: br %r14
110;
111; CHECK-FP-LABEL: f5:
112; CHECK-FP: lay %r1, 8192(%r11)
113; CHECK-FP: mvhi 0(%r1), 42
114; CHECK-FP: br %r14
115  %region1 = alloca [2002 x i32], align 8
116  %region2 = alloca [2002 x i32], align 8
117  %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 2
118  %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 2
119  store volatile i32 42, i32 *%ptr1
120  store volatile i32 42, i32 *%ptr2
121  ret void
122}
123
124; ...as above.
125define void @f6() {
126; CHECK-NOFP-LABEL: f6:
127; CHECK-NOFP: lay %r1, 8192(%r15)
128; CHECK-NOFP: mvhi 4(%r1), 42
129; CHECK-NOFP: br %r14
130;
131; CHECK-FP-LABEL: f6:
132; CHECK-FP: lay %r1, 8192(%r11)
133; CHECK-FP: mvhi 4(%r1), 42
134; CHECK-FP: br %r14
135  %region1 = alloca [2002 x i32], align 8
136  %region2 = alloca [2002 x i32], align 8
137  %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 3
138  %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 3
139  store volatile i32 42, i32 *%ptr1
140  store volatile i32 42, i32 *%ptr2
141  ret void
142}
143
144; Now try an offset of 4092 from the start of the object, with the object
145; being at offset 8192.  This time we need objects of (8192 - 176) / 4 = 2004
146; words.
147define void @f7() {
148; CHECK-NOFP-LABEL: f7:
149; CHECK-NOFP: lay %r1, 8192(%r15)
150; CHECK-NOFP: mvhi 4092(%r1), 42
151; CHECK-NOFP: br %r14
152;
153; CHECK-FP-LABEL: f7:
154; CHECK-FP: lay %r1, 8192(%r11)
155; CHECK-FP: mvhi 4092(%r1), 42
156; CHECK-FP: br %r14
157  %region1 = alloca [2004 x i32], align 8
158  %region2 = alloca [2004 x i32], align 8
159  %ptr1 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region1, i64 0, i64 1023
160  %ptr2 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region2, i64 0, i64 1023
161  store volatile i32 42, i32 *%ptr1
162  store volatile i32 42, i32 *%ptr2
163  ret void
164}
165
166; Keep the object-relative offset the same but bump the size of the
167; objects by one doubleword.
168define void @f8() {
169; CHECK-NOFP-LABEL: f8:
170; CHECK-NOFP: lay %r1, 12288(%r15)
171; CHECK-NOFP: mvhi 4(%r1), 42
172; CHECK-NOFP: br %r14
173;
174; CHECK-FP-LABEL: f8:
175; CHECK-FP: lay %r1, 12288(%r11)
176; CHECK-FP: mvhi 4(%r1), 42
177; CHECK-FP: br %r14
178  %region1 = alloca [2006 x i32], align 8
179  %region2 = alloca [2006 x i32], align 8
180  %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1023
181  %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1023
182  store volatile i32 42, i32 *%ptr1
183  store volatile i32 42, i32 *%ptr2
184  ret void
185}
186
187; Check a case where the original displacement is out of range.  The backend
188; should force STY to be used instead.
189define void @f9() {
190; CHECK-NOFP-LABEL: f9:
191; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42
192; CHECK-NOFP: sty [[TMP]], 12296(%r15)
193; CHECK-NOFP: br %r14
194;
195; CHECK-FP-LABEL: f9:
196; CHECK-FP: lhi [[TMP:%r[0-5]]], 42
197; CHECK-FP: sty [[TMP]], 12296(%r11)
198; CHECK-FP: br %r14
199  %region1 = alloca [2006 x i32], align 8
200  %region2 = alloca [2006 x i32], align 8
201  %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1024
202  %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1024
203  store volatile i32 42, i32 *%ptr1
204  store volatile i32 42, i32 *%ptr2
205  ret void
206}
207
208; Repeat f2 in a case that needs the emergency spill slots (because all
209; call-clobbered registers are live and no call-saved ones have been
210; allocated).
211define void @f10(i32 *%vptr) {
212; CHECK-NOFP-LABEL: f10:
213; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
214; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
215; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
216; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
217; CHECK-NOFP: br %r14
218;
219; CHECK-FP-LABEL: f10:
220; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
221; CHECK-FP: lay [[REGISTER]], 4096(%r11)
222; CHECK-FP: mvhi 0([[REGISTER]]), 42
223; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
224; CHECK-FP: br %r14
225  %i0 = load volatile i32 , i32 *%vptr
226  %i1 = load volatile i32 , i32 *%vptr
227  %i3 = load volatile i32 , i32 *%vptr
228  %i4 = load volatile i32 , i32 *%vptr
229  %i5 = load volatile i32 , i32 *%vptr
230  %region1 = alloca [978 x i32], align 8
231  %region2 = alloca [978 x i32], align 8
232  %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
233  %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
234  store volatile i32 42, i32 *%ptr1
235  store volatile i32 42, i32 *%ptr2
236  store volatile i32 %i0, i32 *%vptr
237  store volatile i32 %i1, i32 *%vptr
238  store volatile i32 %i3, i32 *%vptr
239  store volatile i32 %i4, i32 *%vptr
240  store volatile i32 %i5, i32 *%vptr
241  ret void
242}
243
244; And again with maximum register pressure.  The only spill slots that the
245; NOFP case needs are the emergency ones, so the offsets are the same as for f2.
246; The FP case needs to spill an extra register and is too dependent on
247; register allocation heuristics for a stable test.
248define void @f11(i32 *%vptr) {
249; CHECK-NOFP-LABEL: f11:
250; CHECK-NOFP: stmg %r6, %r15,
251; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
252; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
253; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
254; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
255; CHECK-NOFP: lmg %r6, %r15,
256; CHECK-NOFP: br %r14
257  %i0 = load volatile i32 , i32 *%vptr
258  %i1 = load volatile i32 , i32 *%vptr
259  %i3 = load volatile i32 , i32 *%vptr
260  %i4 = load volatile i32 , i32 *%vptr
261  %i5 = load volatile i32 , i32 *%vptr
262  %i6 = load volatile i32 , i32 *%vptr
263  %i7 = load volatile i32 , i32 *%vptr
264  %i8 = load volatile i32 , i32 *%vptr
265  %i9 = load volatile i32 , i32 *%vptr
266  %i10 = load volatile i32 , i32 *%vptr
267  %i11 = load volatile i32 , i32 *%vptr
268  %i12 = load volatile i32 , i32 *%vptr
269  %i13 = load volatile i32 , i32 *%vptr
270  %i14 = load volatile i32 , i32 *%vptr
271  %region1 = alloca [978 x i32], align 8
272  %region2 = alloca [978 x i32], align 8
273  %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
274  %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
275  store volatile i32 42, i32 *%ptr1
276  store volatile i32 42, i32 *%ptr2
277  store volatile i32 %i0, i32 *%vptr
278  store volatile i32 %i1, i32 *%vptr
279  store volatile i32 %i3, i32 *%vptr
280  store volatile i32 %i4, i32 *%vptr
281  store volatile i32 %i5, i32 *%vptr
282  store volatile i32 %i6, i32 *%vptr
283  store volatile i32 %i7, i32 *%vptr
284  store volatile i32 %i8, i32 *%vptr
285  store volatile i32 %i9, i32 *%vptr
286  store volatile i32 %i10, i32 *%vptr
287  store volatile i32 %i11, i32 *%vptr
288  store volatile i32 %i12, i32 *%vptr
289  store volatile i32 %i13, i32 *%vptr
290  store volatile i32 %i14, i32 *%vptr
291  ret void
292}
293