1; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
3; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
4
5; CHECK: ldp_int
6; CHECK: ldp
7define i32 @ldp_int(i32* %p) nounwind {
8  %tmp = load i32, i32* %p, align 4
9  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
10  %tmp1 = load i32, i32* %add.ptr, align 4
11  %add = add nsw i32 %tmp1, %tmp
12  ret i32 %add
13}
14
15; CHECK: ldp_sext_int
16; CHECK: ldpsw
17define i64 @ldp_sext_int(i32* %p) nounwind {
18  %tmp = load i32, i32* %p, align 4
19  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
20  %tmp1 = load i32, i32* %add.ptr, align 4
21  %sexttmp = sext i32 %tmp to i64
22  %sexttmp1 = sext i32 %tmp1 to i64
23  %add = add nsw i64 %sexttmp1, %sexttmp
24  ret i64 %add
25}
26
27; CHECK-LABEL: ldp_half_sext_res0_int:
28; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
29; CHECK: sxtw     x[[DST1]], w[[DST1]]
30define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
31  %tmp = load i32, i32* %p, align 4
32  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
33  %tmp1 = load i32, i32* %add.ptr, align 4
34  %sexttmp = sext i32 %tmp to i64
35  %sexttmp1 = zext i32 %tmp1 to i64
36  %add = add nsw i64 %sexttmp1, %sexttmp
37  ret i64 %add
38}
39
40; CHECK-LABEL: ldp_half_sext_res1_int:
41; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
42; CHECK: sxtw     x[[DST2]], w[[DST2]]
43define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
44  %tmp = load i32, i32* %p, align 4
45  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
46  %tmp1 = load i32, i32* %add.ptr, align 4
47  %sexttmp = zext i32 %tmp to i64
48  %sexttmp1 = sext i32 %tmp1 to i64
49  %add = add nsw i64 %sexttmp1, %sexttmp
50  ret i64 %add
51}
52
53
54; CHECK: ldp_long
55; CHECK: ldp
56define i64 @ldp_long(i64* %p) nounwind {
57  %tmp = load i64, i64* %p, align 8
58  %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
59  %tmp1 = load i64, i64* %add.ptr, align 8
60  %add = add nsw i64 %tmp1, %tmp
61  ret i64 %add
62}
63
64; CHECK: ldp_float
65; CHECK: ldp
66define float @ldp_float(float* %p) nounwind {
67  %tmp = load float, float* %p, align 4
68  %add.ptr = getelementptr inbounds float, float* %p, i64 1
69  %tmp1 = load float, float* %add.ptr, align 4
70  %add = fadd float %tmp, %tmp1
71  ret float %add
72}
73
74; CHECK: ldp_double
75; CHECK: ldp
76define double @ldp_double(double* %p) nounwind {
77  %tmp = load double, double* %p, align 8
78  %add.ptr = getelementptr inbounds double, double* %p, i64 1
79  %tmp1 = load double, double* %add.ptr, align 8
80  %add = fadd double %tmp, %tmp1
81  ret double %add
82}
83
84; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
85define i32 @ldur_int(i32* %a) nounwind {
86; LDUR_CHK: ldur_int
87; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
88; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
89; LDUR_CHK-NEXT: ret
90  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
91  %tmp1 = load i32, i32* %p1, align 2
92  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
93  %tmp2 = load i32, i32* %p2, align 2
94  %tmp3 = add i32 %tmp1, %tmp2
95  ret i32 %tmp3
96}
97
98define i64 @ldur_sext_int(i32* %a) nounwind {
99; LDUR_CHK: ldur_sext_int
100; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
101; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
102; LDUR_CHK-NEXT: ret
103  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
104  %tmp1 = load i32, i32* %p1, align 2
105  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
106  %tmp2 = load i32, i32* %p2, align 2
107  %sexttmp1 = sext i32 %tmp1 to i64
108  %sexttmp2 = sext i32 %tmp2 to i64
109  %tmp3 = add i64 %sexttmp1, %sexttmp2
110  ret i64 %tmp3
111}
112
113define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
114; LDUR_CHK: ldur_half_sext_int_res0
115; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
116; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
117; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
118; LDUR_CHK-NEXT: ret
119  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
120  %tmp1 = load i32, i32* %p1, align 2
121  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
122  %tmp2 = load i32, i32* %p2, align 2
123  %sexttmp1 = zext i32 %tmp1 to i64
124  %sexttmp2 = sext i32 %tmp2 to i64
125  %tmp3 = add i64 %sexttmp1, %sexttmp2
126  ret i64 %tmp3
127}
128
129define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
130; LDUR_CHK: ldur_half_sext_int_res1
131; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
132; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
133; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
134; LDUR_CHK-NEXT: ret
135  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
136  %tmp1 = load i32, i32* %p1, align 2
137  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
138  %tmp2 = load i32, i32* %p2, align 2
139  %sexttmp1 = sext i32 %tmp1 to i64
140  %sexttmp2 = zext i32 %tmp2 to i64
141  %tmp3 = add i64 %sexttmp1, %sexttmp2
142  ret i64 %tmp3
143}
144
145
146define i64 @ldur_long(i64* %a) nounwind ssp {
147; LDUR_CHK: ldur_long
148; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
149; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
150; LDUR_CHK-NEXT: ret
151  %p1 = getelementptr inbounds i64, i64* %a, i64 -1
152  %tmp1 = load i64, i64* %p1, align 2
153  %p2 = getelementptr inbounds i64, i64* %a, i64 -2
154  %tmp2 = load i64, i64* %p2, align 2
155  %tmp3 = add i64 %tmp1, %tmp2
156  ret i64 %tmp3
157}
158
159define float @ldur_float(float* %a) {
160; LDUR_CHK: ldur_float
161; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
162; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
163; LDUR_CHK-NEXT: ret
164  %p1 = getelementptr inbounds float, float* %a, i64 -1
165  %tmp1 = load float, float* %p1, align 2
166  %p2 = getelementptr inbounds float, float* %a, i64 -2
167  %tmp2 = load float, float* %p2, align 2
168  %tmp3 = fadd float %tmp1, %tmp2
169  ret float %tmp3
170}
171
172define double @ldur_double(double* %a) {
173; LDUR_CHK: ldur_double
174; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
175; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
176; LDUR_CHK-NEXT: ret
177  %p1 = getelementptr inbounds double, double* %a, i64 -1
178  %tmp1 = load double, double* %p1, align 2
179  %p2 = getelementptr inbounds double, double* %a, i64 -2
180  %tmp2 = load double, double* %p2, align 2
181  %tmp3 = fadd double %tmp1, %tmp2
182  ret double %tmp3
183}
184
185; Now check some boundary conditions
186define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
187; LDUR_CHK: pairUpBarelyIn
188; LDUR_CHK-NOT: ldur
189; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
190; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
191; LDUR_CHK-NEXT: ret
192  %p1 = getelementptr inbounds i64, i64* %a, i64 -31
193  %tmp1 = load i64, i64* %p1, align 2
194  %p2 = getelementptr inbounds i64, i64* %a, i64 -32
195  %tmp2 = load i64, i64* %p2, align 2
196  %tmp3 = add i64 %tmp1, %tmp2
197  ret i64 %tmp3
198}
199
200define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
201; LDUR_CHK: pairUpBarelyInSext
202; LDUR_CHK-NOT: ldur
203; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
204; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
205; LDUR_CHK-NEXT: ret
206  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
207  %tmp1 = load i32, i32* %p1, align 2
208  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
209  %tmp2 = load i32, i32* %p2, align 2
210  %sexttmp1 = sext i32 %tmp1 to i64
211  %sexttmp2 = sext i32 %tmp2 to i64
212  %tmp3 = add i64 %sexttmp1, %sexttmp2
213  ret i64 %tmp3
214}
215
216define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
217; LDUR_CHK: pairUpBarelyInHalfSextRes0
218; LDUR_CHK-NOT: ldur
219; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
220; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
221; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
222; LDUR_CHK-NEXT: ret
223  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
224  %tmp1 = load i32, i32* %p1, align 2
225  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
226  %tmp2 = load i32, i32* %p2, align 2
227  %sexttmp1 = zext i32 %tmp1 to i64
228  %sexttmp2 = sext i32 %tmp2 to i64
229  %tmp3 = add i64 %sexttmp1, %sexttmp2
230  ret i64 %tmp3
231}
232
233define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
234; LDUR_CHK: pairUpBarelyInHalfSextRes1
235; LDUR_CHK-NOT: ldur
236; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
237; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
238; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
239; LDUR_CHK-NEXT: ret
240  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
241  %tmp1 = load i32, i32* %p1, align 2
242  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
243  %tmp2 = load i32, i32* %p2, align 2
244  %sexttmp1 = sext i32 %tmp1 to i64
245  %sexttmp2 = zext i32 %tmp2 to i64
246  %tmp3 = add i64 %sexttmp1, %sexttmp2
247  ret i64 %tmp3
248}
249
250define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
251; LDUR_CHK: pairUpBarelyOut
252; LDUR_CHK-NOT: ldp
253; Don't be fragile about which loads or manipulations of the base register
254; are used---just check that there isn't an ldp before the add
255; LDUR_CHK: add
256; LDUR_CHK-NEXT: ret
257  %p1 = getelementptr inbounds i64, i64* %a, i64 -32
258  %tmp1 = load i64, i64* %p1, align 2
259  %p2 = getelementptr inbounds i64, i64* %a, i64 -33
260  %tmp2 = load i64, i64* %p2, align 2
261  %tmp3 = add i64 %tmp1, %tmp2
262  ret i64 %tmp3
263}
264
265define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
266; LDUR_CHK: pairUpBarelyOutSext
267; LDUR_CHK-NOT: ldp
268; Don't be fragile about which loads or manipulations of the base register
269; are used---just check that there isn't an ldp before the add
270; LDUR_CHK: add
271; LDUR_CHK-NEXT: ret
272  %p1 = getelementptr inbounds i32, i32* %a, i64 -64
273  %tmp1 = load i32, i32* %p1, align 2
274  %p2 = getelementptr inbounds i32, i32* %a, i64 -65
275  %tmp2 = load i32, i32* %p2, align 2
276  %sexttmp1 = sext i32 %tmp1 to i64
277  %sexttmp2 = sext i32 %tmp2 to i64
278  %tmp3 = add i64 %sexttmp1, %sexttmp2
279  ret i64 %tmp3
280}
281
282define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
283; LDUR_CHK: pairUpNotAligned
284; LDUR_CHK-NOT: ldp
285; LDUR_CHK: ldur
286; LDUR_CHK-NEXT: ldur
287; LDUR_CHK-NEXT: add
288; LDUR_CHK-NEXT: ret
289  %p1 = getelementptr inbounds i64, i64* %a, i64 -18
290  %bp1 = bitcast i64* %p1 to i8*
291  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
292  %dp1 = bitcast i8* %bp1p1 to i64*
293  %tmp1 = load i64, i64* %dp1, align 1
294
295  %p2 = getelementptr inbounds i64, i64* %a, i64 -17
296  %bp2 = bitcast i64* %p2 to i8*
297  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
298  %dp2 = bitcast i8* %bp2p1 to i64*
299  %tmp2 = load i64, i64* %dp2, align 1
300
301  %tmp3 = add i64 %tmp1, %tmp2
302  ret i64 %tmp3
303}
304
305define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
306; LDUR_CHK: pairUpNotAlignedSext
307; LDUR_CHK-NOT: ldp
308; LDUR_CHK: ldursw
309; LDUR_CHK-NEXT: ldursw
310; LDUR_CHK-NEXT: add
311; LDUR_CHK-NEXT: ret
312  %p1 = getelementptr inbounds i32, i32* %a, i64 -18
313  %bp1 = bitcast i32* %p1 to i8*
314  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
315  %dp1 = bitcast i8* %bp1p1 to i32*
316  %tmp1 = load i32, i32* %dp1, align 1
317
318  %p2 = getelementptr inbounds i32, i32* %a, i64 -17
319  %bp2 = bitcast i32* %p2 to i8*
320  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
321  %dp2 = bitcast i8* %bp2p1 to i32*
322  %tmp2 = load i32, i32* %dp2, align 1
323
324  %sexttmp1 = sext i32 %tmp1 to i64
325  %sexttmp2 = sext i32 %tmp2 to i64
326  %tmp3 = add i64 %sexttmp1, %sexttmp2
327 ret i64 %tmp3
328}
329