1; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
2
3; CHECK-LABEL: ldp_int
4; CHECK: ldp
5define i32 @ldp_int(i32* %p) nounwind {
6  %tmp = load i32, i32* %p, align 4
7  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
8  %tmp1 = load i32, i32* %add.ptr, align 4
9  %add = add nsw i32 %tmp1, %tmp
10  ret i32 %add
11}
12
13; CHECK-LABEL: ldp_sext_int
14; CHECK: ldpsw
15define i64 @ldp_sext_int(i32* %p) nounwind {
16  %tmp = load i32, i32* %p, align 4
17  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
18  %tmp1 = load i32, i32* %add.ptr, align 4
19  %sexttmp = sext i32 %tmp to i64
20  %sexttmp1 = sext i32 %tmp1 to i64
21  %add = add nsw i64 %sexttmp1, %sexttmp
22  ret i64 %add
23}
24
25; CHECK-LABEL: ldp_half_sext_res0_int:
26; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
27; CHECK: sxtw     x[[DST1]], w[[DST1]]
28define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
29  %tmp = load i32, i32* %p, align 4
30  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
31  %tmp1 = load i32, i32* %add.ptr, align 4
32  %sexttmp = sext i32 %tmp to i64
33  %sexttmp1 = zext i32 %tmp1 to i64
34  %add = add nsw i64 %sexttmp1, %sexttmp
35  ret i64 %add
36}
37
38; CHECK-LABEL: ldp_half_sext_res1_int:
39; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
40; CHECK: sxtw     x[[DST2]], w[[DST2]]
41define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
42  %tmp = load i32, i32* %p, align 4
43  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
44  %tmp1 = load i32, i32* %add.ptr, align 4
45  %sexttmp = zext i32 %tmp to i64
46  %sexttmp1 = sext i32 %tmp1 to i64
47  %add = add nsw i64 %sexttmp1, %sexttmp
48  ret i64 %add
49}
50
51
52; CHECK-LABEL: ldp_long
53; CHECK: ldp
54define i64 @ldp_long(i64* %p) nounwind {
55  %tmp = load i64, i64* %p, align 8
56  %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
57  %tmp1 = load i64, i64* %add.ptr, align 8
58  %add = add nsw i64 %tmp1, %tmp
59  ret i64 %add
60}
61
62; CHECK-LABEL: ldp_float
63; CHECK: ldp
64define float @ldp_float(float* %p) nounwind {
65  %tmp = load float, float* %p, align 4
66  %add.ptr = getelementptr inbounds float, float* %p, i64 1
67  %tmp1 = load float, float* %add.ptr, align 4
68  %add = fadd float %tmp, %tmp1
69  ret float %add
70}
71
72; CHECK-LABEL: ldp_double
73; CHECK: ldp
74define double @ldp_double(double* %p) nounwind {
75  %tmp = load double, double* %p, align 8
76  %add.ptr = getelementptr inbounds double, double* %p, i64 1
77  %tmp1 = load double, double* %add.ptr, align 8
78  %add = fadd double %tmp, %tmp1
79  ret double %add
80}
81
82; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
83define i32 @ldur_int(i32* %a) nounwind {
84; CHECK-LABEL: ldur_int
85; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
86; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
87; CHECK-NEXT: ret
88  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
89  %tmp1 = load i32, i32* %p1, align 2
90  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
91  %tmp2 = load i32, i32* %p2, align 2
92  %tmp3 = add i32 %tmp1, %tmp2
93  ret i32 %tmp3
94}
95
96define i64 @ldur_sext_int(i32* %a) nounwind {
97; CHECK-LABEL: ldur_sext_int
98; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
99; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
100; CHECK-NEXT: ret
101  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
102  %tmp1 = load i32, i32* %p1, align 2
103  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
104  %tmp2 = load i32, i32* %p2, align 2
105  %sexttmp1 = sext i32 %tmp1 to i64
106  %sexttmp2 = sext i32 %tmp2 to i64
107  %tmp3 = add i64 %sexttmp1, %sexttmp2
108  ret i64 %tmp3
109}
110
111define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
112; CHECK-LABEL: ldur_half_sext_int_res0
113; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
114; CHECK: sxtw     x[[DST1]], w[[DST1]]
115; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
116; CHECK-NEXT: ret
117  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
118  %tmp1 = load i32, i32* %p1, align 2
119  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
120  %tmp2 = load i32, i32* %p2, align 2
121  %sexttmp1 = zext i32 %tmp1 to i64
122  %sexttmp2 = sext i32 %tmp2 to i64
123  %tmp3 = add i64 %sexttmp1, %sexttmp2
124  ret i64 %tmp3
125}
126
127define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
128; CHECK-LABEL: ldur_half_sext_int_res1
129; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
130; CHECK: sxtw     x[[DST2]], w[[DST2]]
131; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
132; CHECK-NEXT: ret
133  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
134  %tmp1 = load i32, i32* %p1, align 2
135  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
136  %tmp2 = load i32, i32* %p2, align 2
137  %sexttmp1 = sext i32 %tmp1 to i64
138  %sexttmp2 = zext i32 %tmp2 to i64
139  %tmp3 = add i64 %sexttmp1, %sexttmp2
140  ret i64 %tmp3
141}
142
143
144define i64 @ldur_long(i64* %a) nounwind ssp {
145; CHECK-LABEL: ldur_long
146; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
147; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
148; CHECK-NEXT: ret
149  %p1 = getelementptr inbounds i64, i64* %a, i64 -1
150  %tmp1 = load i64, i64* %p1, align 2
151  %p2 = getelementptr inbounds i64, i64* %a, i64 -2
152  %tmp2 = load i64, i64* %p2, align 2
153  %tmp3 = add i64 %tmp1, %tmp2
154  ret i64 %tmp3
155}
156
157define float @ldur_float(float* %a) {
158; CHECK-LABEL: ldur_float
159; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
160; CHECK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
161; CHECK-NEXT: ret
162  %p1 = getelementptr inbounds float, float* %a, i64 -1
163  %tmp1 = load float, float* %p1, align 2
164  %p2 = getelementptr inbounds float, float* %a, i64 -2
165  %tmp2 = load float, float* %p2, align 2
166  %tmp3 = fadd float %tmp1, %tmp2
167  ret float %tmp3
168}
169
170define double @ldur_double(double* %a) {
171; CHECK-LABEL: ldur_double
172; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
173; CHECK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
174; CHECK-NEXT: ret
175  %p1 = getelementptr inbounds double, double* %a, i64 -1
176  %tmp1 = load double, double* %p1, align 2
177  %p2 = getelementptr inbounds double, double* %a, i64 -2
178  %tmp2 = load double, double* %p2, align 2
179  %tmp3 = fadd double %tmp1, %tmp2
180  ret double %tmp3
181}
182
183; Now check some boundary conditions
184define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
185; CHECK-LABEL: pairUpBarelyIn
186; CHECK-NOT: ldur
187; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
188; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
189; CHECK-NEXT: ret
190  %p1 = getelementptr inbounds i64, i64* %a, i64 -31
191  %tmp1 = load i64, i64* %p1, align 2
192  %p2 = getelementptr inbounds i64, i64* %a, i64 -32
193  %tmp2 = load i64, i64* %p2, align 2
194  %tmp3 = add i64 %tmp1, %tmp2
195  ret i64 %tmp3
196}
197
198define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
199; CHECK-LABEL: pairUpBarelyInSext
200; CHECK-NOT: ldur
201; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
202; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
203; CHECK-NEXT: ret
204  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
205  %tmp1 = load i32, i32* %p1, align 2
206  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
207  %tmp2 = load i32, i32* %p2, align 2
208  %sexttmp1 = sext i32 %tmp1 to i64
209  %sexttmp2 = sext i32 %tmp2 to i64
210  %tmp3 = add i64 %sexttmp1, %sexttmp2
211  ret i64 %tmp3
212}
213
214define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
215; CHECK-LABEL: pairUpBarelyInHalfSextRes0
216; CHECK-NOT: ldur
217; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
218; CHECK: sxtw     x[[DST1]], w[[DST1]]
219; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
220; CHECK-NEXT: ret
221  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
222  %tmp1 = load i32, i32* %p1, align 2
223  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
224  %tmp2 = load i32, i32* %p2, align 2
225  %sexttmp1 = zext i32 %tmp1 to i64
226  %sexttmp2 = sext i32 %tmp2 to i64
227  %tmp3 = add i64 %sexttmp1, %sexttmp2
228  ret i64 %tmp3
229}
230
231define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
232; CHECK-LABEL: pairUpBarelyInHalfSextRes1
233; CHECK-NOT: ldur
234; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
235; CHECK: sxtw     x[[DST2]], w[[DST2]]
236; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
237; CHECK-NEXT: ret
238  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
239  %tmp1 = load i32, i32* %p1, align 2
240  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
241  %tmp2 = load i32, i32* %p2, align 2
242  %sexttmp1 = sext i32 %tmp1 to i64
243  %sexttmp2 = zext i32 %tmp2 to i64
244  %tmp3 = add i64 %sexttmp1, %sexttmp2
245  ret i64 %tmp3
246}
247
248define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
249; CHECK-LABEL: pairUpBarelyOut
250; CHECK-NOT: ldp
251; Don't be fragile about which loads or manipulations of the base register
252; are used---just check that there isn't an ldp before the add
253; CHECK: add
254; CHECK-NEXT: ret
255  %p1 = getelementptr inbounds i64, i64* %a, i64 -32
256  %tmp1 = load i64, i64* %p1, align 2
257  %p2 = getelementptr inbounds i64, i64* %a, i64 -33
258  %tmp2 = load i64, i64* %p2, align 2
259  %tmp3 = add i64 %tmp1, %tmp2
260  ret i64 %tmp3
261}
262
263define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
264; CHECK-LABEL: pairUpBarelyOutSext
265; CHECK-NOT: ldp
266; Don't be fragile about which loads or manipulations of the base register
267; are used---just check that there isn't an ldp before the add
268; CHECK: add
269; CHECK-NEXT: ret
270  %p1 = getelementptr inbounds i32, i32* %a, i64 -64
271  %tmp1 = load i32, i32* %p1, align 2
272  %p2 = getelementptr inbounds i32, i32* %a, i64 -65
273  %tmp2 = load i32, i32* %p2, align 2
274  %sexttmp1 = sext i32 %tmp1 to i64
275  %sexttmp2 = sext i32 %tmp2 to i64
276  %tmp3 = add i64 %sexttmp1, %sexttmp2
277  ret i64 %tmp3
278}
279
280define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
281; CHECK-LABEL: pairUpNotAligned
282; CHECK-NOT: ldp
283; CHECK: ldur
284; CHECK-NEXT: ldur
285; CHECK-NEXT: add
286; CHECK-NEXT: ret
287  %p1 = getelementptr inbounds i64, i64* %a, i64 -18
288  %bp1 = bitcast i64* %p1 to i8*
289  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
290  %dp1 = bitcast i8* %bp1p1 to i64*
291  %tmp1 = load i64, i64* %dp1, align 1
292
293  %p2 = getelementptr inbounds i64, i64* %a, i64 -17
294  %bp2 = bitcast i64* %p2 to i8*
295  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
296  %dp2 = bitcast i8* %bp2p1 to i64*
297  %tmp2 = load i64, i64* %dp2, align 1
298
299  %tmp3 = add i64 %tmp1, %tmp2
300  ret i64 %tmp3
301}
302
303define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
304; CHECK-LABEL: pairUpNotAlignedSext
305; CHECK-NOT: ldp
306; CHECK: ldursw
307; CHECK-NEXT: ldursw
308; CHECK-NEXT: add
309; CHECK-NEXT: ret
310  %p1 = getelementptr inbounds i32, i32* %a, i64 -18
311  %bp1 = bitcast i32* %p1 to i8*
312  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
313  %dp1 = bitcast i8* %bp1p1 to i32*
314  %tmp1 = load i32, i32* %dp1, align 1
315
316  %p2 = getelementptr inbounds i32, i32* %a, i64 -17
317  %bp2 = bitcast i32* %p2 to i8*
318  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
319  %dp2 = bitcast i8* %bp2p1 to i32*
320  %tmp2 = load i32, i32* %dp2, align 1
321
322  %sexttmp1 = sext i32 %tmp1 to i64
323  %sexttmp2 = sext i32 %tmp2 to i64
324  %tmp3 = add i64 %sexttmp1, %sexttmp2
325 ret i64 %tmp3
326}
327
328declare void @use-ptr(i32*)
329
330; CHECK-LABEL: ldp_sext_int_pre
331; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
332define i64 @ldp_sext_int_pre(i32* %p) nounwind {
333  %ptr = getelementptr inbounds i32, i32* %p, i64 2
334  call void @use-ptr(i32* %ptr)
335  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
336  %tmp = load i32, i32* %add.ptr, align 4
337  %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
338  %tmp1 = load i32, i32* %add.ptr1, align 4
339  %sexttmp = sext i32 %tmp to i64
340  %sexttmp1 = sext i32 %tmp1 to i64
341  %add = add nsw i64 %sexttmp1, %sexttmp
342  ret i64 %add
343}
344
345; CHECK-LABEL: ldp_sext_int_post
346; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
347define i64 @ldp_sext_int_post(i32* %p) nounwind {
348  %tmp = load i32, i32* %p, align 4
349  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
350  %tmp1 = load i32, i32* %add.ptr, align 4
351  %sexttmp = sext i32 %tmp to i64
352  %sexttmp1 = sext i32 %tmp1 to i64
353  %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
354  call void @use-ptr(i32* %ptr)
355  %add = add nsw i64 %sexttmp1, %sexttmp
356  ret i64 %add
357}
358
359