1; Test the handling of base + displacement addresses for large frames,
2; in cases where both 12-bit and 20-bit displacements are allowed.
3; The tests here assume z10 register pressure, without the high words
4; being available.
5;
6; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
7; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
8; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -frame-pointer=all | \
9; RUN:   FileCheck -check-prefix=CHECK-FP %s
10;
11; This file tests what happens when a displacement is converted from
12; being relative to the start of a frame object to being relative to
13; the frame itself.  In some cases the test is only possible if two
14; objects are allocated.
15;
16; Rather than rely on a particular order for those objects, the tests
17; instead allocate two objects of the same size and apply the test to
18; both of them.  For consistency, all tests follow this model, even if
19; one object would actually be enough.
20
21; First check the highest offset that is in range of the 12-bit form.
22;
23; The last in-range doubleword offset is 4088.  Since the frame has two
24; emergency spill slots at 160(%r15), the amount that we need to allocate
25; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes.
26define void @f1() {
27; CHECK-NOFP-LABEL: f1:
28; CHECK-NOFP: mvi 4095(%r15), 42
29; CHECK-NOFP: br %r14
30;
31; CHECK-FP-LABEL: f1:
32; CHECK-FP: mvi 4095(%r11), 42
33; CHECK-FP: br %r14
34  %region1 = alloca [3912 x i8], align 8
35  %region2 = alloca [3912 x i8], align 8
36  %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 7
37  %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 7
38  store volatile i8 42, i8 *%ptr1
39  store volatile i8 42, i8 *%ptr2
40  ret void
41}
42
43; Test the first offset that is out-of-range of the 12-bit form.
44define void @f2() {
45; CHECK-NOFP-LABEL: f2:
46; CHECK-NOFP: mviy 4096(%r15), 42
47; CHECK-NOFP: br %r14
48;
49; CHECK-FP-LABEL: f2:
50; CHECK-FP: mviy 4096(%r11), 42
51; CHECK-FP: br %r14
52  %region1 = alloca [3912 x i8], align 8
53  %region2 = alloca [3912 x i8], align 8
54  %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 8
55  %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 8
56  store volatile i8 42, i8 *%ptr1
57  store volatile i8 42, i8 *%ptr2
58  ret void
59}
60
61; Test the last offset that is in range of the 20-bit form.
62;
63; The last in-range doubleword offset is 524280, so by the same reasoning
64; as above, we need to allocate objects of 524280 - 176 = 524104 bytes.
65define void @f3() {
66; CHECK-NOFP-LABEL: f3:
67; CHECK-NOFP: mviy 524287(%r15), 42
68; CHECK-NOFP: br %r14
69;
70; CHECK-FP-LABEL: f3:
71; CHECK-FP: mviy 524287(%r11), 42
72; CHECK-FP: br %r14
73  %region1 = alloca [524104 x i8], align 8
74  %region2 = alloca [524104 x i8], align 8
75  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 7
76  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 7
77  store volatile i8 42, i8 *%ptr1
78  store volatile i8 42, i8 *%ptr2
79  ret void
80}
81
82; Test the first out-of-range offset.  We can't use an index register here,
83; and the offset is also out of LAY's range, so expect a constant load
84; followed by an addition.
85define void @f4() {
86; CHECK-NOFP-LABEL: f4:
87; CHECK-NOFP: llilh %r1, 8
88; CHECK-NOFP: la %r1, 0(%r1,%r15)
89; CHECK-NOFP: mvi 0(%r1), 42
90; CHECK-NOFP: br %r14
91;
92; CHECK-FP-LABEL: f4:
93; CHECK-FP: llilh %r1, 8
94; CHECK-FP: la %r1, 0(%r1,%r11)
95; CHECK-FP: mvi 0(%r1), 42
96; CHECK-FP: br %r14
97  %region1 = alloca [524104 x i8], align 8
98  %region2 = alloca [524104 x i8], align 8
99  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
100  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
101  store volatile i8 42, i8 *%ptr1
102  store volatile i8 42, i8 *%ptr2
103  ret void
104}
105
106; Add 4095 to the previous offset, to test the other end of the MVI range.
107; The instruction will actually be STCY before frame lowering.
108define void @f5() {
109; CHECK-NOFP-LABEL: f5:
110; CHECK-NOFP: llilh %r1, 8
111; CHECK-NOFP: la %r1, 0(%r1,%r15)
112; CHECK-NOFP: mvi 4095(%r1), 42
113; CHECK-NOFP: br %r14
114;
115; CHECK-FP-LABEL: f5:
116; CHECK-FP: llilh %r1, 8
117; CHECK-FP: la %r1, 0(%r1,%r11)
118; CHECK-FP: mvi 4095(%r1), 42
119; CHECK-FP: br %r14
120  %region1 = alloca [524104 x i8], align 8
121  %region2 = alloca [524104 x i8], align 8
122  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4103
123  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4103
124  store volatile i8 42, i8 *%ptr1
125  store volatile i8 42, i8 *%ptr2
126  ret void
127}
128
129; Test the next offset after that, which uses MVIY instead of MVI.
130define void @f6() {
131; CHECK-NOFP-LABEL: f6:
132; CHECK-NOFP: llilh %r1, 8
133; CHECK-NOFP: la %r1, 0(%r1,%r15)
134; CHECK-NOFP: mviy 4096(%r1), 42
135; CHECK-NOFP: br %r14
136;
137; CHECK-FP-LABEL: f6:
138; CHECK-FP: llilh %r1, 8
139; CHECK-FP: la %r1, 0(%r1,%r11)
140; CHECK-FP: mviy 4096(%r1), 42
141; CHECK-FP: br %r14
142  %region1 = alloca [524104 x i8], align 8
143  %region2 = alloca [524104 x i8], align 8
144  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4104
145  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4104
146  store volatile i8 42, i8 *%ptr1
147  store volatile i8 42, i8 *%ptr2
148  ret void
149}
150
151; Now try an offset of 524287 from the start of the object, with the
152; object being at offset 1048576 (1 << 20).  The backend prefers to create
153; anchors 0x10000 bytes apart, so that the high part can be loaded using
154; LLILH while still using MVI in more cases than 0x40000 anchors would.
155define void @f7() {
156; CHECK-NOFP-LABEL: f7:
157; CHECK-NOFP: llilh %r1, 23
158; CHECK-NOFP: la %r1, 0(%r1,%r15)
159; CHECK-NOFP: mviy 65535(%r1), 42
160; CHECK-NOFP: br %r14
161;
162; CHECK-FP-LABEL: f7:
163; CHECK-FP: llilh %r1, 23
164; CHECK-FP: la %r1, 0(%r1,%r11)
165; CHECK-FP: mviy 65535(%r1), 42
166; CHECK-FP: br %r14
167  %region1 = alloca [1048400 x i8], align 8
168  %region2 = alloca [1048400 x i8], align 8
169  %ptr1 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region1, i64 0, i64 524287
170  %ptr2 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region2, i64 0, i64 524287
171  store volatile i8 42, i8 *%ptr1
172  store volatile i8 42, i8 *%ptr2
173  ret void
174}
175
176; Keep the object-relative offset the same but bump the size of the
177; objects by one doubleword.
178define void @f8() {
179; CHECK-NOFP-LABEL: f8:
180; CHECK-NOFP: llilh %r1, 24
181; CHECK-NOFP: la %r1, 0(%r1,%r15)
182; CHECK-NOFP: mvi 7(%r1), 42
183; CHECK-NOFP: br %r14
184;
185; CHECK-FP-LABEL: f8:
186; CHECK-FP: llilh %r1, 24
187; CHECK-FP: la %r1, 0(%r1,%r11)
188; CHECK-FP: mvi 7(%r1), 42
189; CHECK-FP: br %r14
190  %region1 = alloca [1048408 x i8], align 8
191  %region2 = alloca [1048408 x i8], align 8
192  %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524287
193  %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524287
194  store volatile i8 42, i8 *%ptr1
195  store volatile i8 42, i8 *%ptr2
196  ret void
197}
198
199; Check a case where the original displacement is out of range.  The backend
200; should force separate address logic from the outset.  We don't yet do any
201; kind of anchor optimization, so there should be no offset on the MVI itself.
202;
203; Before frame lowering this is an LA followed by the AGFI seen below.
204; The LA then gets lowered into the LLILH/LA form.  The exact sequence
205; isn't that important though.
206define void @f9() {
207; CHECK-NOFP-LABEL: f9:
208; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
209; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
210; CHECK-NOFP: agfi [[R2]], 524288
211; CHECK-NOFP: mvi 0([[R2]]), 42
212; CHECK-NOFP: br %r14
213;
214; CHECK-FP-LABEL: f9:
215; CHECK-FP: llilh [[R1:%r[1-5]]], 16
216; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
217; CHECK-FP: agfi [[R2]], 524288
218; CHECK-FP: mvi 0([[R2]]), 42
219; CHECK-FP: br %r14
220  %region1 = alloca [1048408 x i8], align 8
221  %region2 = alloca [1048408 x i8], align 8
222  %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524288
223  %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524288
224  store volatile i8 42, i8 *%ptr1
225  store volatile i8 42, i8 *%ptr2
226  ret void
227}
228
229; Repeat f4 in a case that needs the emergency spill slots (because all
230; call-clobbered registers are live and no call-saved ones have been
231; allocated).
232define void @f10(i32 *%vptr) {
233; CHECK-NOFP-LABEL: f10:
234; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
235; CHECK-NOFP: llilh [[REGISTER]], 8
236; CHECK-NOFP: la [[REGISTER]], 0([[REGISTER]],%r15)
237; CHECK-NOFP: mvi 0([[REGISTER]]), 42
238; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
239; CHECK-NOFP: br %r14
240;
241; CHECK-FP-LABEL: f10:
242; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
243; CHECK-FP: llilh [[REGISTER]], 8
244; CHECK-FP: la [[REGISTER]], 0([[REGISTER]],%r11)
245; CHECK-FP: mvi 0([[REGISTER]]), 42
246; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
247; CHECK-FP: br %r14
248  %i0 = load volatile i32, i32 *%vptr
249  %i1 = load volatile i32, i32 *%vptr
250  %i3 = load volatile i32, i32 *%vptr
251  %i4 = load volatile i32, i32 *%vptr
252  %i5 = load volatile i32, i32 *%vptr
253  %region1 = alloca [524104 x i8], align 8
254  %region2 = alloca [524104 x i8], align 8
255  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
256  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
257  store volatile i8 42, i8 *%ptr1
258  store volatile i8 42, i8 *%ptr2
259  store volatile i32 %i0, i32 *%vptr
260  store volatile i32 %i1, i32 *%vptr
261  store volatile i32 %i3, i32 *%vptr
262  store volatile i32 %i4, i32 *%vptr
263  store volatile i32 %i5, i32 *%vptr
264  ret void
265}
266
267; And again with maximum register pressure.  The only spill slots that the
268; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
269; The FP case needs to spill an extra register and is too dependent on
270; register allocation heuristics for a stable test.
271define void @f11(i32 *%vptr) {
272; CHECK-NOFP-LABEL: f11:
273; CHECK-NOFP: stmg %r6, %r15,
274; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
275; CHECK-NOFP: llilh [[REGISTER]], 8
276; CHECK-NOFP: la [[REGISTER]], 0([[REGISTER]],%r15)
277; CHECK-NOFP: mvi 0([[REGISTER]]), 42
278; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
279; CHECK-NOFP: lmg %r6, %r15,
280; CHECK-NOFP: br %r14
281  %i0 = load volatile i32, i32 *%vptr
282  %i1 = load volatile i32, i32 *%vptr
283  %i3 = load volatile i32, i32 *%vptr
284  %i4 = load volatile i32, i32 *%vptr
285  %i5 = load volatile i32, i32 *%vptr
286  %i6 = load volatile i32, i32 *%vptr
287  %i7 = load volatile i32, i32 *%vptr
288  %i8 = load volatile i32, i32 *%vptr
289  %i9 = load volatile i32, i32 *%vptr
290  %i10 = load volatile i32, i32 *%vptr
291  %i11 = load volatile i32, i32 *%vptr
292  %i12 = load volatile i32, i32 *%vptr
293  %i13 = load volatile i32, i32 *%vptr
294  %i14 = load volatile i32, i32 *%vptr
295  %region1 = alloca [524104 x i8], align 8
296  %region2 = alloca [524104 x i8], align 8
297  %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
298  %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
299  store volatile i8 42, i8 *%ptr1
300  store volatile i8 42, i8 *%ptr2
301  store volatile i32 %i0, i32 *%vptr
302  store volatile i32 %i1, i32 *%vptr
303  store volatile i32 %i3, i32 *%vptr
304  store volatile i32 %i4, i32 *%vptr
305  store volatile i32 %i5, i32 *%vptr
306  store volatile i32 %i6, i32 *%vptr
307  store volatile i32 %i7, i32 *%vptr
308  store volatile i32 %i8, i32 *%vptr
309  store volatile i32 %i9, i32 *%vptr
310  store volatile i32 %i10, i32 *%vptr
311  store volatile i32 %i11, i32 *%vptr
312  store volatile i32 %i12, i32 *%vptr
313  store volatile i32 %i13, i32 *%vptr
314  store volatile i32 %i14, i32 *%vptr
315  ret void
316}
317