1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s
3
4; This file contains tests for the AArch64 load/store optimizer.
5
6%padding = type { i8*, i8*, i8*, i8* }
7%s.byte = type { i8, i8 }
8%s.halfword = type { i16, i16 }
9%s.word = type { i32, i32 }
10%s.doubleword = type { i64, i32 }
11%s.quadword = type { fp128, i32 }
12%s.float = type { float, i32 }
13%s.double = type { double, i32 }
14%struct.byte = type { %padding, %s.byte }
15%struct.halfword = type { %padding, %s.halfword }
16%struct.word = type { %padding, %s.word }
17%struct.doubleword = type { %padding, %s.doubleword }
18%struct.quadword = type { %padding, %s.quadword }
19%struct.float = type { %padding, %s.float }
20%struct.double = type { %padding, %s.double }
21
22; Check the following transform:
23;
24; (ldr|str) X, [x0, #32]
25;  ...
26; add x0, x0, #32
27;  ->
28; (ldr|str) X, [x0, #32]!
29;
30; with X being either w1, x1, s0, d0 or q0.
31
32declare void @bar_byte(%s.byte*, i8)
33
34define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind {
35; CHECK-LABEL: load-pre-indexed-byte
36; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
37entry:
38  %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
39  %add = load i8, i8* %a, align 4
40  br label %bar
41bar:
42  %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
43  tail call void @bar_byte(%s.byte* %c, i8 %add)
44  ret void
45}
46
47define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind {
48; CHECK-LABEL: store-pre-indexed-byte
49; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
50entry:
51  %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
52  store i8 %val, i8* %a, align 4
53  br label %bar
54bar:
55  %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
56  tail call void @bar_byte(%s.byte* %c, i8 %val)
57  ret void
58}
59
60declare void @bar_halfword(%s.halfword*, i16)
61
62define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind {
63; CHECK-LABEL: load-pre-indexed-halfword
64; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
65entry:
66  %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
67  %add = load i16, i16* %a, align 4
68  br label %bar
69bar:
70  %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
71  tail call void @bar_halfword(%s.halfword* %c, i16 %add)
72  ret void
73}
74
75define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind {
76; CHECK-LABEL: store-pre-indexed-halfword
77; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
78entry:
79  %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
80  store i16 %val, i16* %a, align 4
81  br label %bar
82bar:
83  %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
84  tail call void @bar_halfword(%s.halfword* %c, i16 %val)
85  ret void
86}
87
88declare void @bar_word(%s.word*, i32)
89
90define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
91; CHECK-LABEL: load-pre-indexed-word
92; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
93entry:
94  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
95  %add = load i32, i32* %a, align 4
96  br label %bar
97bar:
98  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
99  tail call void @bar_word(%s.word* %c, i32 %add)
100  ret void
101}
102
103define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
104; CHECK-LABEL: store-pre-indexed-word
105; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
106entry:
107  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
108  store i32 %val, i32* %a, align 4
109  br label %bar
110bar:
111  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
112  tail call void @bar_word(%s.word* %c, i32 %val)
113  ret void
114}
115
116declare void @bar_doubleword(%s.doubleword*, i64)
117
118define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind {
119; CHECK-LABEL: load-pre-indexed-doubleword
120; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
121entry:
122  %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
123  %add = load i64, i64* %a, align 8
124  br label %bar
125bar:
126  %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
127  tail call void @bar_doubleword(%s.doubleword* %c, i64 %add)
128  ret void
129}
130
131define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind {
132; CHECK-LABEL: store-pre-indexed-doubleword
133; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
134entry:
135  %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
136  store i64 %val, i64* %a, align 8
137  br label %bar
138bar:
139  %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
140  tail call void @bar_doubleword(%s.doubleword* %c, i64 %val)
141  ret void
142}
143
144declare void @bar_quadword(%s.quadword*, fp128)
145
146define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind {
147; CHECK-LABEL: load-pre-indexed-quadword
148; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
149entry:
150  %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
151  %add = load fp128, fp128* %a, align 16
152  br label %bar
153bar:
154  %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
155  tail call void @bar_quadword(%s.quadword* %c, fp128 %add)
156  ret void
157}
158
159define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind {
160; CHECK-LABEL: store-pre-indexed-quadword
161; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
162entry:
163  %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
164  store fp128 %val, fp128* %a, align 16
165  br label %bar
166bar:
167  %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
168  tail call void @bar_quadword(%s.quadword* %c, fp128 %val)
169  ret void
170}
171
172declare void @bar_float(%s.float*, float)
173
174define void @load-pre-indexed-float(%struct.float* %ptr) nounwind {
175; CHECK-LABEL: load-pre-indexed-float
176; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
177entry:
178  %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
179  %add = load float, float* %a, align 4
180  br label %bar
181bar:
182  %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
183  tail call void @bar_float(%s.float* %c, float %add)
184  ret void
185}
186
187define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind {
188; CHECK-LABEL: store-pre-indexed-float
189; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
190entry:
191  %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
192  store float %val, float* %a, align 4
193  br label %bar
194bar:
195  %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
196  tail call void @bar_float(%s.float* %c, float %val)
197  ret void
198}
199
200declare void @bar_double(%s.double*, double)
201
202define void @load-pre-indexed-double(%struct.double* %ptr) nounwind {
203; CHECK-LABEL: load-pre-indexed-double
204; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
205entry:
206  %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
207  %add = load double, double* %a, align 8
208  br label %bar
209bar:
210  %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
211  tail call void @bar_double(%s.double* %c, double %add)
212  ret void
213}
214
215define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind {
216; CHECK-LABEL: store-pre-indexed-double
217; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
218entry:
219  %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
220  store double %val, double* %a, align 8
221  br label %bar
222bar:
223  %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
224  tail call void @bar_double(%s.double* %c, double %val)
225  ret void
226}
227
228; Check the following transform:
229;
230; (ldp|stp) w1, w2 [x0, #32]
231;  ...
232; add x0, x0, #32
233;  ->
234; (ldp|stp) w1, w2, [x0, #32]!
235;
236
237define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind {
238; CHECK-LABEL: load-pair-pre-indexed-word
239; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
240; CHECK-NOT: add x0, x0, #32
241entry:
242  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
243  %a1 = load i32, i32* %a, align 4
244  %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
245  %b1 = load i32, i32* %b, align 4
246  %add = add i32 %a1, %b1
247  br label %bar
248bar:
249  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
250  tail call void @bar_word(%s.word* %c, i32 %add)
251  ret void
252}
253
254define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
255; CHECK-LABEL: store-pair-pre-indexed-word
256; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
257; CHECK-NOT: add x0, x0, #32
258entry:
259  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
260  store i32 %val, i32* %a, align 4
261  %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
262  store i32 %val, i32* %b, align 4
263  br label %bar
264bar:
265  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
266  tail call void @bar_word(%s.word* %c, i32 %val)
267  ret void
268}
269
270; Check the following transform:
271;
272; add x8, x8, #16
273;  ...
274; ldr X, [x8]
275;  ->
276; ldr X, [x8, #16]!
277;
278; with X being either w0, x0, s0, d0 or q0.
279
280%pre.struct.i32 = type { i32, i32, i32, i32, i32}
281%pre.struct.i64 = type { i32, i64, i64, i64, i64}
282%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>}
283%pre.struct.float = type { i32, float, float, float}
284%pre.struct.double = type { i32, double, double, double}
285
286define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
287                                   %pre.struct.i32* %load2) nounwind {
288; CHECK-LABEL: load-pre-indexed-word2
289; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
290  br i1 %cond, label %if.then, label %if.end
291if.then:
292  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
293  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
294  br label %return
295if.end:
296  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
297  br label %return
298return:
299  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
300  %ret = load i32, i32* %retptr
301  ret i32 %ret
302}
303
304define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
305                                         %pre.struct.i64* %load2) nounwind {
306; CHECK-LABEL: load-pre-indexed-doubleword2
307; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
308  br i1 %cond, label %if.then, label %if.end
309if.then:
310  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
311  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
312  br label %return
313if.end:
314  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
315  br label %return
316return:
317  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
318  %ret = load i64, i64* %retptr
319  ret i64 %ret
320}
321
322define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
323                                             %pre.struct.i128* %load2) nounwind {
324; CHECK-LABEL: load-pre-indexed-quadword2
325; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
326  br i1 %cond, label %if.then, label %if.end
327if.then:
328  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
329  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
330  br label %return
331if.end:
332  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
333  br label %return
334return:
335  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
336  %ret = load <2 x i64>, <2 x i64>* %retptr
337  ret <2 x i64> %ret
338}
339
340define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
341                                      %pre.struct.float* %load2) nounwind {
342; CHECK-LABEL: load-pre-indexed-float2
343; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
344  br i1 %cond, label %if.then, label %if.end
345if.then:
346  %load1 = load %pre.struct.float*, %pre.struct.float** %this
347  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
348  br label %return
349if.end:
350  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
351  br label %return
352return:
353  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
354  %ret = load float, float* %retptr
355  ret float %ret
356}
357
358define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
359                                        %pre.struct.double* %load2) nounwind {
360; CHECK-LABEL: load-pre-indexed-double2
361; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
362  br i1 %cond, label %if.then, label %if.end
363if.then:
364  %load1 = load %pre.struct.double*, %pre.struct.double** %this
365  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
366  br label %return
367if.end:
368  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
369  br label %return
370return:
371  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
372  %ret = load double, double* %retptr
373  ret double %ret
374}
375
376define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
377                                   %pre.struct.i32* %load2) nounwind {
378; CHECK-LABEL: load-pre-indexed-word3
379; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
380  br i1 %cond, label %if.then, label %if.end
381if.then:
382  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
383  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
384  br label %return
385if.end:
386  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
387  br label %return
388return:
389  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
390  %ret = load i32, i32* %retptr
391  ret i32 %ret
392}
393
394define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
395                                         %pre.struct.i64* %load2) nounwind {
396; CHECK-LABEL: load-pre-indexed-doubleword3
397; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]!
398  br i1 %cond, label %if.then, label %if.end
399if.then:
400  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
401  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2
402  br label %return
403if.end:
404  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3
405  br label %return
406return:
407  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
408  %ret = load i64, i64* %retptr
409  ret i64 %ret
410}
411
412define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
413                                             %pre.struct.i128* %load2) nounwind {
414; CHECK-LABEL: load-pre-indexed-quadword3
415; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
416  br i1 %cond, label %if.then, label %if.end
417if.then:
418  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
419  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
420  br label %return
421if.end:
422  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
423  br label %return
424return:
425  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
426  %ret = load <2 x i64>, <2 x i64>* %retptr
427  ret <2 x i64> %ret
428}
429
430define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
431                                      %pre.struct.float* %load2) nounwind {
432; CHECK-LABEL: load-pre-indexed-float3
433; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
434  br i1 %cond, label %if.then, label %if.end
435if.then:
436  %load1 = load %pre.struct.float*, %pre.struct.float** %this
437  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
438  br label %return
439if.end:
440  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
441  br label %return
442return:
443  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
444  %ret = load float, float* %retptr
445  ret float %ret
446}
447
448define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
449                                        %pre.struct.double* %load2) nounwind {
450; CHECK-LABEL: load-pre-indexed-double3
451; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
452  br i1 %cond, label %if.then, label %if.end
453if.then:
454  %load1 = load %pre.struct.double*, %pre.struct.double** %this
455  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
456  br label %return
457if.end:
458  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
459  br label %return
460return:
461  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
462  %ret = load double, double* %retptr
463  ret double %ret
464}
465
466; Check the following transform:
467;
468; add x8, x8, #16
469;  ...
470; str X, [x8]
471;  ->
472; str X, [x8, #16]!
473;
474; with X being either w0, x0, s0, d0 or q0.
475
476define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
477                                     %pre.struct.i32* %load2,
478                                     i32 %val) nounwind {
479; CHECK-LABEL: store-pre-indexed-word2
480; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
481  br i1 %cond, label %if.then, label %if.end
482if.then:
483  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
484  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
485  br label %return
486if.end:
487  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
488  br label %return
489return:
490  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
491  store i32 %val, i32* %retptr
492  ret void
493}
494
495define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
496                                           %pre.struct.i64* %load2,
497                                           i64 %val) nounwind {
498; CHECK-LABEL: store-pre-indexed-doubleword2
499; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
500  br i1 %cond, label %if.then, label %if.end
501if.then:
502  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
503  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
504  br label %return
505if.end:
506  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
507  br label %return
508return:
509  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
510  store i64 %val, i64* %retptr
511  ret void
512}
513
514define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
515                                         %pre.struct.i128* %load2,
516                                         <2 x i64> %val) nounwind {
517; CHECK-LABEL: store-pre-indexed-quadword2
518; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
519  br i1 %cond, label %if.then, label %if.end
520if.then:
521  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
522  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
523  br label %return
524if.end:
525  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
526  br label %return
527return:
528  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
529  store <2 x i64> %val, <2 x i64>* %retptr
530  ret void
531}
532
533define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
534                                      %pre.struct.float* %load2,
535                                      float %val) nounwind {
536; CHECK-LABEL: store-pre-indexed-float2
537; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
538  br i1 %cond, label %if.then, label %if.end
539if.then:
540  %load1 = load %pre.struct.float*, %pre.struct.float** %this
541  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
542  br label %return
543if.end:
544  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
545  br label %return
546return:
547  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
548  store float %val, float* %retptr
549  ret void
550}
551
552define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
553                                      %pre.struct.double* %load2,
554                                      double %val) nounwind {
555; CHECK-LABEL: store-pre-indexed-double2
556; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
557  br i1 %cond, label %if.then, label %if.end
558if.then:
559  %load1 = load %pre.struct.double*, %pre.struct.double** %this
560  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
561  br label %return
562if.end:
563  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
564  br label %return
565return:
566  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
567  store double %val, double* %retptr
568  ret void
569}
570
571define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
572                                     %pre.struct.i32* %load2,
573                                     i32 %val) nounwind {
574; CHECK-LABEL: store-pre-indexed-word3
575; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
576  br i1 %cond, label %if.then, label %if.end
577if.then:
578  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
579  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
580  br label %return
581if.end:
582  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
583  br label %return
584return:
585  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
586  store i32 %val, i32* %retptr
587  ret void
588}
589
590define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
591                                           %pre.struct.i64* %load2,
592                                           i64 %val) nounwind {
593; CHECK-LABEL: store-pre-indexed-doubleword3
594; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]!
595  br i1 %cond, label %if.then, label %if.end
596if.then:
597  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
598  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3
599  br label %return
600if.end:
601  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4
602  br label %return
603return:
604  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
605  store i64 %val, i64* %retptr
606  ret void
607}
608
609define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
610                                         %pre.struct.i128* %load2,
611                                         <2 x i64> %val) nounwind {
612; CHECK-LABEL: store-pre-indexed-quadword3
613; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
614  br i1 %cond, label %if.then, label %if.end
615if.then:
616  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
617  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
618  br label %return
619if.end:
620  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
621  br label %return
622return:
623  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
624  store <2 x i64> %val, <2 x i64>* %retptr
625  ret void
626}
627
628define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
629                                      %pre.struct.float* %load2,
630                                      float %val) nounwind {
631; CHECK-LABEL: store-pre-indexed-float3
632; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
633  br i1 %cond, label %if.then, label %if.end
634if.then:
635  %load1 = load %pre.struct.float*, %pre.struct.float** %this
636  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
637  br label %return
638if.end:
639  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
640  br label %return
641return:
642  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
643  store float %val, float* %retptr
644  ret void
645}
646
647define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
648                                      %pre.struct.double* %load2,
649                                      double %val) nounwind {
650; CHECK-LABEL: store-pre-indexed-double3
651; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
652  br i1 %cond, label %if.then, label %if.end
653if.then:
654  %load1 = load %pre.struct.double*, %pre.struct.double** %this
655  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
656  br label %return
657if.end:
658  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
659  br label %return
660return:
661  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
662  store double %val, double* %retptr
663  ret void
664}
665
666; Check the following transform:
667;
668; ldr X, [x20]
669;  ...
670; add x20, x20, #32
671;  ->
672; ldr X, [x20], #32
673;
674; with X being either w0, x0, s0, d0 or q0.
675
676define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind {
677; CHECK-LABEL: load-post-indexed-byte
678; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4
679entry:
680  %gep1 = getelementptr i8, i8* %array, i64 2
681  br label %body
682
683body:
684  %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
685  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
686  %gep2 = getelementptr i8, i8* %iv2, i64 -1
687  %load = load i8, i8* %gep2
688  call void @use-byte(i8 %load)
689  %load2 = load i8, i8* %iv2
690  call void @use-byte(i8 %load2)
691  %iv.next = add i64 %iv, -4
692  %gep3 = getelementptr i8, i8* %iv2, i64 4
693  %cond = icmp eq i64 %iv.next, 0
694  br i1 %cond, label %exit, label %body
695
696exit:
697  ret void
698}
699
700define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind {
701; CHECK-LABEL: load-post-indexed-halfword
702; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8
703entry:
704  %gep1 = getelementptr i16, i16* %array, i64 2
705  br label %body
706
707body:
708  %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
709  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
710  %gep2 = getelementptr i16, i16* %iv2, i64 -1
711  %load = load i16, i16* %gep2
712  call void @use-halfword(i16 %load)
713  %load2 = load i16, i16* %iv2
714  call void @use-halfword(i16 %load2)
715  %iv.next = add i64 %iv, -4
716  %gep3 = getelementptr i16, i16* %iv2, i64 4
717  %cond = icmp eq i64 %iv.next, 0
718  br i1 %cond, label %exit, label %body
719
720exit:
721  ret void
722}
723
724define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
725; CHECK-LABEL: load-post-indexed-word
726; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
727entry:
728  %gep1 = getelementptr i32, i32* %array, i64 2
729  br label %body
730
731body:
732  %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
733  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
734  %gep2 = getelementptr i32, i32* %iv2, i64 -1
735  %load = load i32, i32* %gep2
736  call void @use-word(i32 %load)
737  %load2 = load i32, i32* %iv2
738  call void @use-word(i32 %load2)
739  %iv.next = add i64 %iv, -4
740  %gep3 = getelementptr i32, i32* %iv2, i64 4
741  %cond = icmp eq i64 %iv.next, 0
742  br i1 %cond, label %exit, label %body
743
744exit:
745  ret void
746}
747
748define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind {
749; CHECK-LABEL: load-post-indexed-doubleword
750; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32
751entry:
752  %gep1 = getelementptr i64, i64* %array, i64 2
753  br label %body
754
755body:
756  %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
757  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
758  %gep2 = getelementptr i64, i64* %iv2, i64 -1
759  %load = load i64, i64* %gep2
760  call void @use-doubleword(i64 %load)
761  %load2 = load i64, i64* %iv2
762  call void @use-doubleword(i64 %load2)
763  %iv.next = add i64 %iv, -4
764  %gep3 = getelementptr i64, i64* %iv2, i64 4
765  %cond = icmp eq i64 %iv.next, 0
766  br i1 %cond, label %exit, label %body
767
768exit:
769  ret void
770}
771
772define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind {
773; CHECK-LABEL: load-post-indexed-quadword
774; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64
775entry:
776  %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
777  br label %body
778
779body:
780  %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
781  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
782  %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
783  %load = load <2 x i64>, <2 x i64>* %gep2
784  call void @use-quadword(<2 x i64> %load)
785  %load2 = load <2 x i64>, <2 x i64>* %iv2
786  call void @use-quadword(<2 x i64> %load2)
787  %iv.next = add i64 %iv, -4
788  %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
789  %cond = icmp eq i64 %iv.next, 0
790  br i1 %cond, label %exit, label %body
791
792exit:
793  ret void
794}
795
796define void @load-post-indexed-float(float* %array, i64 %count) nounwind {
797; CHECK-LABEL: load-post-indexed-float
798; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16
799entry:
800  %gep1 = getelementptr float, float* %array, i64 2
801  br label %body
802
803body:
804  %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
805  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
806  %gep2 = getelementptr float, float* %iv2, i64 -1
807  %load = load float, float* %gep2
808  call void @use-float(float %load)
809  %load2 = load float, float* %iv2
810  call void @use-float(float %load2)
811  %iv.next = add i64 %iv, -4
812  %gep3 = getelementptr float, float* %iv2, i64 4
813  %cond = icmp eq i64 %iv.next, 0
814  br i1 %cond, label %exit, label %body
815
816exit:
817  ret void
818}
819
820define void @load-post-indexed-double(double* %array, i64 %count) nounwind {
821; CHECK-LABEL: load-post-indexed-double
822; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32
823entry:
824  %gep1 = getelementptr double, double* %array, i64 2
825  br label %body
826
827body:
828  %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
829  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
830  %gep2 = getelementptr double, double* %iv2, i64 -1
831  %load = load double, double* %gep2
832  call void @use-double(double %load)
833  %load2 = load double, double* %iv2
834  call void @use-double(double %load2)
835  %iv.next = add i64 %iv, -4
836  %gep3 = getelementptr double, double* %iv2, i64 4
837  %cond = icmp eq i64 %iv.next, 0
838  br i1 %cond, label %exit, label %body
839
840exit:
841  ret void
842}
843
844; Check the following transform:
845;
846; str X, [x20]
847;  ...
848; add x20, x20, #32
849;  ->
850; str X, [x20], #32
851;
852; with X being either w0, x0, s0, d0 or q0.
853
854define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind {
855; CHECK-LABEL: store-post-indexed-byte
856; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4
857entry:
858  %gep1 = getelementptr i8, i8* %array, i64 2
859  br label %body
860
861body:
862  %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
863  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
864  %gep2 = getelementptr i8, i8* %iv2, i64 -1
865  %load = load i8, i8* %gep2
866  call void @use-byte(i8 %load)
867  store i8 %val, i8* %iv2
868  %iv.next = add i64 %iv, -4
869  %gep3 = getelementptr i8, i8* %iv2, i64 4
870  %cond = icmp eq i64 %iv.next, 0
871  br i1 %cond, label %exit, label %body
872
873exit:
874  ret void
875}
876
877define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind {
878; CHECK-LABEL: store-post-indexed-halfword
879; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8
880entry:
881  %gep1 = getelementptr i16, i16* %array, i64 2
882  br label %body
883
884body:
885  %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
886  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
887  %gep2 = getelementptr i16, i16* %iv2, i64 -1
888  %load = load i16, i16* %gep2
889  call void @use-halfword(i16 %load)
890  store i16 %val, i16* %iv2
891  %iv.next = add i64 %iv, -4
892  %gep3 = getelementptr i16, i16* %iv2, i64 4
893  %cond = icmp eq i64 %iv.next, 0
894  br i1 %cond, label %exit, label %body
895
896exit:
897  ret void
898}
899
900define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
901; CHECK-LABEL: store-post-indexed-word
902; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
903entry:
904  %gep1 = getelementptr i32, i32* %array, i64 2
905  br label %body
906
907body:
908  %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
909  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
910  %gep2 = getelementptr i32, i32* %iv2, i64 -1
911  %load = load i32, i32* %gep2
912  call void @use-word(i32 %load)
913  store i32 %val, i32* %iv2
914  %iv.next = add i64 %iv, -4
915  %gep3 = getelementptr i32, i32* %iv2, i64 4
916  %cond = icmp eq i64 %iv.next, 0
917  br i1 %cond, label %exit, label %body
918
919exit:
920  ret void
921}
922
923define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind {
924; CHECK-LABEL: store-post-indexed-doubleword
925; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32
926entry:
927  %gep1 = getelementptr i64, i64* %array, i64 2
928  br label %body
929
930body:
931  %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
932  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
933  %gep2 = getelementptr i64, i64* %iv2, i64 -1
934  %load = load i64, i64* %gep2
935  call void @use-doubleword(i64 %load)
936  store i64 %val, i64* %iv2
937  %iv.next = add i64 %iv, -4
938  %gep3 = getelementptr i64, i64* %iv2, i64 4
939  %cond = icmp eq i64 %iv.next, 0
940  br i1 %cond, label %exit, label %body
941
942exit:
943  ret void
944}
945
946define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind {
947; CHECK-LABEL: store-post-indexed-quadword
948; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64
949entry:
950  %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
951  br label %body
952
953body:
954  %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
955  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
956  %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
957  %load = load <2 x i64>, <2 x i64>* %gep2
958  call void @use-quadword(<2 x i64> %load)
959  store <2 x i64> %val, <2 x i64>* %iv2
960  %iv.next = add i64 %iv, -4
961  %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
962  %cond = icmp eq i64 %iv.next, 0
963  br i1 %cond, label %exit, label %body
964
965exit:
966  ret void
967}
968
969define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind {
970; CHECK-LABEL: store-post-indexed-float
971; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16
972entry:
973  %gep1 = getelementptr float, float* %array, i64 2
974  br label %body
975
976body:
977  %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
978  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
979  %gep2 = getelementptr float, float* %iv2, i64 -1
980  %load = load float, float* %gep2
981  call void @use-float(float %load)
982  store float %val, float* %iv2
983  %iv.next = add i64 %iv, -4
984  %gep3 = getelementptr float, float* %iv2, i64 4
985  %cond = icmp eq i64 %iv.next, 0
986  br i1 %cond, label %exit, label %body
987
988exit:
989  ret void
990}
991
992define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind {
993; CHECK-LABEL: store-post-indexed-double
994; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32
995entry:
996  %gep1 = getelementptr double, double* %array, i64 2
997  br label %body
998
999body:
1000  %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
1001  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
1002  %gep2 = getelementptr double, double* %iv2, i64 -1
1003  %load = load double, double* %gep2
1004  call void @use-double(double %load)
1005  store double %val, double* %iv2
1006  %iv.next = add i64 %iv, -4
1007  %gep3 = getelementptr double, double* %iv2, i64 4
1008  %cond = icmp eq i64 %iv.next, 0
1009  br i1 %cond, label %exit, label %body
1010
1011exit:
1012  ret void
1013}
1014
1015declare void @use-byte(i8)
1016declare void @use-halfword(i16)
1017declare void @use-word(i32)
1018declare void @use-doubleword(i64)
1019declare void @use-quadword(<2 x i64>)
1020declare void @use-float(float)
1021declare void @use-double(double)
1022
1023; Check the following transform:
1024;
1025; stp w0, [x20]
1026;  ...
1027; add x20, x20, #32
1028;  ->
1029; stp w0, [x20], #32
1030
1031define void @store-pair-post-indexed-word() nounwind {
1032; CHECK-LABEL: store-pair-post-indexed-word
1033; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16
1034; CHECK: ret
1035  %src = alloca { i32, i32 }, align 8
1036  %dst = alloca { i32, i32 }, align 8
1037
1038  %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0
1039  %src.real = load i32, i32* %src.realp
1040  %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1
1041  %src.imag = load i32, i32* %src.imagp
1042
1043  %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0
1044  %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1
1045  store i32 %src.real, i32* %dst.realp
1046  store i32 %src.imag, i32* %dst.imagp
1047  ret void
1048}
1049
1050define void @store-pair-post-indexed-doubleword() nounwind {
1051; CHECK-LABEL: store-pair-post-indexed-doubleword
1052; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32
1053; CHECK: ret
1054  %src = alloca { i64, i64 }, align 8
1055  %dst = alloca { i64, i64 }, align 8
1056
1057  %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0
1058  %src.real = load i64, i64* %src.realp
1059  %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1
1060  %src.imag = load i64, i64* %src.imagp
1061
1062  %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0
1063  %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1
1064  store i64 %src.real, i64* %dst.realp
1065  store i64 %src.imag, i64* %dst.imagp
1066  ret void
1067}
1068
1069define void @store-pair-post-indexed-float() nounwind {
1070; CHECK-LABEL: store-pair-post-indexed-float
1071; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16
1072; CHECK: ret
1073  %src = alloca { float, float }, align 8
1074  %dst = alloca { float, float }, align 8
1075
1076  %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0
1077  %src.real = load float, float* %src.realp
1078  %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1
1079  %src.imag = load float, float* %src.imagp
1080
1081  %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0
1082  %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1
1083  store float %src.real, float* %dst.realp
1084  store float %src.imag, float* %dst.imagp
1085  ret void
1086}
1087
1088define void @store-pair-post-indexed-double() nounwind {
1089; CHECK-LABEL: store-pair-post-indexed-double
1090; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32
1091; CHECK: ret
1092  %src = alloca { double, double }, align 8
1093  %dst = alloca { double, double }, align 8
1094
1095  %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
1096  %src.real = load double, double* %src.realp
1097  %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
1098  %src.imag = load double, double* %src.imagp
1099
1100  %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
1101  %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
1102  store double %src.real, double* %dst.realp
1103  store double %src.imag, double* %dst.imagp
1104  ret void
1105}
1106
1107; Check the following transform:
1108;
1109; (ldr|str) X, [x20]
1110;  ...
1111; sub x20, x20, #16
1112;  ->
1113; (ldr|str) X, [x20], #-16
1114;
1115; with X being either w0, x0, s0, d0 or q0.
1116
1117define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
1118; CHECK-LABEL: post-indexed-sub-word
1119; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
1120; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
1121  br label %for.body
1122for.body:
1123  %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
1124  %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
1125  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1126  %gep1 = getelementptr i32, i32* %phi1, i64 -1
1127  %load1 = load i32, i32* %gep1
1128  %gep2 = getelementptr i32, i32* %phi2, i64 -1
1129  store i32 %load1, i32* %gep2
1130  %load2 = load i32, i32* %phi1
1131  store i32 %load2, i32* %phi2
1132  %dec.i = add nsw i64 %i, -1
1133  %gep3 = getelementptr i32, i32* %phi2, i64 -2
1134  %gep4 = getelementptr i32, i32* %phi1, i64 -2
1135  %cond = icmp sgt i64 %dec.i, 0
1136  br i1 %cond, label %for.body, label %end
1137end:
1138  ret void
1139}
1140
1141define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
1142; CHECK-LABEL: post-indexed-sub-doubleword
1143; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
1144; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
1145  br label %for.body
1146for.body:
1147  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1148  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1149  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1150  %gep1 = getelementptr i64, i64* %phi1, i64 -1
1151  %load1 = load i64, i64* %gep1
1152  %gep2 = getelementptr i64, i64* %phi2, i64 -1
1153  store i64 %load1, i64* %gep2
1154  %load2 = load i64, i64* %phi1
1155  store i64 %load2, i64* %phi2
1156  %dec.i = add nsw i64 %i, -1
1157  %gep3 = getelementptr i64, i64* %phi2, i64 -2
1158  %gep4 = getelementptr i64, i64* %phi1, i64 -2
1159  %cond = icmp sgt i64 %dec.i, 0
1160  br i1 %cond, label %for.body, label %end
1161end:
1162  ret void
1163}
1164
1165define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
1166; CHECK-LABEL: post-indexed-sub-quadword
1167; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
1168; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
1169  br label %for.body
1170for.body:
1171  %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
1172  %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
1173  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1174  %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1
1175  %load1 = load <2 x i64>, <2 x i64>* %gep1
1176  %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1
1177  store <2 x i64> %load1, <2 x i64>* %gep2
1178  %load2 = load <2 x i64>, <2 x i64>* %phi1
1179  store <2 x i64> %load2, <2 x i64>* %phi2
1180  %dec.i = add nsw i64 %i, -1
1181  %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2
1182  %gep4 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -2
1183  %cond = icmp sgt i64 %dec.i, 0
1184  br i1 %cond, label %for.body, label %end
1185end:
1186  ret void
1187}
1188
1189define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
1190; CHECK-LABEL: post-indexed-sub-float
1191; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
1192; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
1193  br label %for.body
1194for.body:
1195  %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
1196  %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
1197  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1198  %gep1 = getelementptr float, float* %phi1, i64 -1
1199  %load1 = load float, float* %gep1
1200  %gep2 = getelementptr float, float* %phi2, i64 -1
1201  store float %load1, float* %gep2
1202  %load2 = load float, float* %phi1
1203  store float %load2, float* %phi2
1204  %dec.i = add nsw i64 %i, -1
1205  %gep3 = getelementptr float, float* %phi2, i64 -2
1206  %gep4 = getelementptr float, float* %phi1, i64 -2
1207  %cond = icmp sgt i64 %dec.i, 0
1208  br i1 %cond, label %for.body, label %end
1209end:
1210  ret void
1211}
1212
1213define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
1214; CHECK-LABEL: post-indexed-sub-double
1215; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
1216; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
1217  br label %for.body
1218for.body:
1219  %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
1220  %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
1221  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1222  %gep1 = getelementptr double, double* %phi1, i64 -1
1223  %load1 = load double, double* %gep1
1224  %gep2 = getelementptr double, double* %phi2, i64 -1
1225  store double %load1, double* %gep2
1226  %load2 = load double, double* %phi1
1227  store double %load2, double* %phi2
1228  %dec.i = add nsw i64 %i, -1
1229  %gep3 = getelementptr double, double* %phi2, i64 -2
1230  %gep4 = getelementptr double, double* %phi1, i64 -2
1231  %cond = icmp sgt i64 %dec.i, 0
1232  br i1 %cond, label %for.body, label %end
1233end:
1234  ret void
1235}
1236
1237define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
1238; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
1239; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
1240; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
1241  br label %for.body
1242for.body:
1243  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1244  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1245  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1246  %gep1 = getelementptr i64, i64* %phi1, i64 1
1247  %load1 = load i64, i64* %gep1
1248  %gep2 = getelementptr i64, i64* %phi2, i64 1
1249  store i64 %load1, i64* %gep2
1250  %load2 = load i64, i64* %phi1
1251  store i64 %load2, i64* %phi2
1252  %dec.i = add nsw i64 %i, -1
1253  %gep3 = getelementptr i64, i64* %phi2, i64 -32
1254  %gep4 = getelementptr i64, i64* %phi1, i64 -32
1255  %cond = icmp sgt i64 %dec.i, 0
1256  br i1 %cond, label %for.body, label %end
1257end:
1258  ret void
1259}
1260
1261define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
1262; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
1263; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
1264; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
1265; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
1266; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
1267
1268  br label %for.body
1269for.body:
1270  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1271  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1272  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1273  %gep1 = getelementptr i64, i64* %phi1, i64 1
1274  %load1 = load i64, i64* %gep1
1275  %gep2 = getelementptr i64, i64* %phi2, i64 1
1276  store i64 %load1, i64* %gep2
1277  %load2 = load i64, i64* %phi1
1278  store i64 %load2, i64* %phi2
1279  %dec.i = add nsw i64 %i, -1
1280  %gep3 = getelementptr i64, i64* %phi2, i64 32
1281  %gep4 = getelementptr i64, i64* %phi1, i64 32
1282  %cond = icmp sgt i64 %dec.i, 0
1283  br i1 %cond, label %for.body, label %end
1284end:
1285  ret void
1286}
1287
1288define void @post-indexed-paired-min-offset(i64* %a, i64* %b, i64 %count) nounwind {
1289; CHECK-LABEL: post-indexed-paired-min-offset
1290; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
1291; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
1292  br label %for.body
1293for.body:
1294  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1295  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1296  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1297  %gep1 = getelementptr i64, i64* %phi1, i64 1
1298  %load1 = load i64, i64* %gep1
1299  %gep2 = getelementptr i64, i64* %phi2, i64 1
1300  %load2 = load i64, i64* %phi1
1301  store i64 %load1, i64* %gep2
1302  store i64 %load2, i64* %phi2
1303  %dec.i = add nsw i64 %i, -1
1304  %gep3 = getelementptr i64, i64* %phi2, i64 -64
1305  %gep4 = getelementptr i64, i64* %phi1, i64 -64
1306  %cond = icmp sgt i64 %dec.i, 0
1307  br i1 %cond, label %for.body, label %end
1308end:
1309  ret void
1310}
1311
1312define void @post-indexed-paired-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
1313; CHECK-LABEL: post-indexed-paired-offset-out-of-range
1314; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
1315; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
1316; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
1317; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
1318  br label %for.body
1319for.body:
1320  %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1321  %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1322  %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1323  %gep1 = getelementptr i64, i64* %phi1, i64 1
1324  %load1 = load i64, i64* %phi1
1325  %gep2 = getelementptr i64, i64* %phi2, i64 1
1326  %load2 = load i64, i64* %gep1
1327  store i64 %load1, i64* %gep2
1328  store i64 %load2, i64* %phi2
1329  %dec.i = add nsw i64 %i, -1
1330  %gep3 = getelementptr i64, i64* %phi2, i64 64
1331  %gep4 = getelementptr i64, i64* %phi1, i64 64
1332  %cond = icmp sgt i64 %dec.i, 0
1333  br i1 %cond, label %for.body, label %end
1334end:
1335  ret void
1336}
1337
1338; DAGCombiner::MergeConsecutiveStores merges this into a vector store,
1339; replaceZeroVectorStore should split the vector store back into
1340; scalar stores which should get merged by AArch64LoadStoreOptimizer.
1341define void @merge_zr32(i32* %p) {
1342; CHECK-LABEL: merge_zr32:
1343; CHECK: // %entry
1344; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
1345; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1346; CHECK-NEXT: ret
1347entry:
1348  store i32 0, i32* %p
1349  %p1 = getelementptr i32, i32* %p, i32 1
1350  store i32 0, i32* %p1
1351  ret void
1352}
1353
1354; Same as merge_zr32 but the merged stores should also get paried.
1355define void @merge_zr32_2(i32* %p) {
1356; CHECK-LABEL: merge_zr32_2:
1357; CHECK: // %entry
1358; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1359; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1360; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
1361; CHECK-NEXT: ret
1362entry:
1363  store i32 0, i32* %p
1364  %p1 = getelementptr i32, i32* %p, i32 1
1365  store i32 0, i32* %p1
1366  %p2 = getelementptr i32, i32* %p, i64 2
1367  store i32 0, i32* %p2
1368  %p3 = getelementptr i32, i32* %p, i64 3
1369  store i32 0, i32* %p3
1370  ret void
1371}
1372
1373; Like merge_zr32_2, but checking the largest allowed stp immediate offset.
1374define void @merge_zr32_2_offset(i32* %p) {
1375; CHECK-LABEL: merge_zr32_2_offset:
1376; CHECK: // %entry
1377; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504]
1378; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504]
1379; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508]
1380; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512]
1381; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516]
1382; CHECK-NEXT: ret
1383entry:
1384  %p0 = getelementptr i32, i32* %p, i32 126
1385  store i32 0, i32* %p0
1386  %p1 = getelementptr i32, i32* %p, i32 127
1387  store i32 0, i32* %p1
1388  %p2 = getelementptr i32, i32* %p, i64 128
1389  store i32 0, i32* %p2
1390  %p3 = getelementptr i32, i32* %p, i64 129
1391  store i32 0, i32* %p3
1392  ret void
1393}
1394
1395; Like merge_zr32, but replaceZeroVectorStore should not split this
1396; vector store since the address offset is too large for the stp
1397; instruction.
1398define void @no_merge_zr32_2_offset(i32* %p) {
1399; CHECK-LABEL: no_merge_zr32_2_offset:
1400; CHECK: // %entry
1401; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1402; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096]
1403; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096]
1404; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100]
1405; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104]
1406; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108]
1407; CHECK-NEXT: ret
1408entry:
1409  %p0 = getelementptr i32, i32* %p, i32 1024
1410  store i32 0, i32* %p0
1411  %p1 = getelementptr i32, i32* %p, i32 1025
1412  store i32 0, i32* %p1
1413  %p2 = getelementptr i32, i32* %p, i64 1026
1414  store i32 0, i32* %p2
1415  %p3 = getelementptr i32, i32* %p, i64 1027
1416  store i32 0, i32* %p3
1417  ret void
1418}
1419
1420; Like merge_zr32, but replaceZeroVectorStore should not split the
1421; vector store since the zero constant vector has multiple uses, so we
1422; err on the side that allows for stp q instruction generation.
1423define void @merge_zr32_3(i32* %p) {
1424; CHECK-LABEL: merge_zr32_3:
1425; CHECK: // %entry
1426; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1427; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
1428; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1429; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
1430; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16]
1431; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24]
1432; CHECK-NEXT: ret
1433entry:
1434  store i32 0, i32* %p
1435  %p1 = getelementptr i32, i32* %p, i32 1
1436  store i32 0, i32* %p1
1437  %p2 = getelementptr i32, i32* %p, i64 2
1438  store i32 0, i32* %p2
1439  %p3 = getelementptr i32, i32* %p, i64 3
1440  store i32 0, i32* %p3
1441  %p4 = getelementptr i32, i32* %p, i64 4
1442  store i32 0, i32* %p4
1443  %p5 = getelementptr i32, i32* %p, i64 5
1444  store i32 0, i32* %p5
1445  %p6 = getelementptr i32, i32* %p, i64 6
1446  store i32 0, i32* %p6
1447  %p7 = getelementptr i32, i32* %p, i64 7
1448  store i32 0, i32* %p7
1449  ret void
1450}
1451
1452; Like merge_zr32, but with 2-vector type.
1453define void @merge_zr32_2vec(<2 x i32>* %p) {
1454; CHECK-LABEL: merge_zr32_2vec:
1455; CHECK: // %entry
1456; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
1457; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1458; CHECK-NEXT: ret
1459entry:
1460  store <2 x i32> zeroinitializer, <2 x i32>* %p
1461  ret void
1462}
1463
1464; Like merge_zr32, but with 3-vector type.
1465define void @merge_zr32_3vec(<3 x i32>* %p) {
1466; CHECK-LABEL: merge_zr32_3vec:
1467; CHECK: // %entry
1468; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
1469; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
1470; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1471; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
1472; CHECK-NEXT: ret
1473entry:
1474  store <3 x i32> zeroinitializer, <3 x i32>* %p
1475  ret void
1476}
1477
1478; Like merge_zr32, but with 4-vector type.
1479define void @merge_zr32_4vec(<4 x i32>* %p) {
1480; CHECK-LABEL: merge_zr32_4vec:
1481; CHECK: // %entry
1482; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1483; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1484; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
1485; CHECK-NEXT: ret
1486entry:
1487  store <4 x i32> zeroinitializer, <4 x i32>* %p
1488  ret void
1489}
1490
1491; Like merge_zr32, but with 2-vector float type.
1492define void @merge_zr32_2vecf(<2 x float>* %p) {
1493; CHECK-LABEL: merge_zr32_2vecf:
1494; CHECK: // %entry
1495; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
1496; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1497; CHECK-NEXT: ret
1498entry:
1499  store <2 x float> zeroinitializer, <2 x float>* %p
1500  ret void
1501}
1502
1503; Like merge_zr32, but with 4-vector float type.
1504define void @merge_zr32_4vecf(<4 x float>* %p) {
1505; CHECK-LABEL: merge_zr32_4vecf:
1506; CHECK: // %entry
1507; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1508; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
1509; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
1510; CHECK-NEXT: ret
1511entry:
1512  store <4 x float> zeroinitializer, <4 x float>* %p
1513  ret void
1514}
1515
1516; Similar to merge_zr32, but for 64-bit values.
1517define void @merge_zr64(i64* %p) {
1518; CHECK-LABEL: merge_zr64:
1519; CHECK: // %entry
1520; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1521; CHECK-NEXT: ret
1522entry:
1523  store i64 0, i64* %p
1524  %p1 = getelementptr i64, i64* %p, i64 1
1525  store i64 0, i64* %p1
1526  ret void
1527}
1528
1529; Similar to merge_zr32, but for 64-bit values and with unaligned stores.
1530define void @merge_zr64_unalign(<2 x i64>* %p) {
1531; CHECK-LABEL: merge_zr64_unalign:
1532; CHECK: // %entry
1533; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1534; STRICTALIGN: strb
1535; STRICTALIGN: strb
1536; STRICTALIGN: strb
1537; STRICTALIGN: strb
1538; STRICTALIGN: strb
1539; STRICTALIGN: strb
1540; STRICTALIGN: strb
1541; STRICTALIGN: strb
1542; STRICTALIGN: strb
1543; STRICTALIGN: strb
1544; STRICTALIGN: strb
1545; STRICTALIGN: strb
1546; STRICTALIGN: strb
1547; STRICTALIGN: strb
1548; STRICTALIGN: strb
1549; STRICTALIGN: strb
1550; CHECK-NEXT: ret
1551entry:
1552  store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1
1553  ret void
1554}
1555
1556; Similar to merge_zr32_3, replaceZeroVectorStore should not split the
1557; vector store since the zero constant vector has multiple uses.
1558define void @merge_zr64_2(i64* %p) {
1559; CHECK-LABEL: merge_zr64_2:
1560; CHECK: // %entry
1561; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1562; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
1563; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1564; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16]
1565; CHECK-NEXT: ret
1566entry:
1567  store i64 0, i64* %p
1568  %p1 = getelementptr i64, i64* %p, i64 1
1569  store i64 0, i64* %p1
1570  %p2 = getelementptr i64, i64* %p, i64 2
1571  store i64 0, i64* %p2
1572  %p3 = getelementptr i64, i64* %p, i64 3
1573  store i64 0, i64* %p3
1574  ret void
1575}
1576
1577; Like merge_zr64, but with 2-vector double type.
1578define void @merge_zr64_2vecd(<2 x double>* %p) {
1579; CHECK-LABEL: merge_zr64_2vecd:
1580; CHECK: // %entry
1581; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1582; CHECK-NEXT: ret
1583entry:
1584  store <2 x double> zeroinitializer, <2 x double>* %p
1585  ret void
1586}
1587
1588; Like merge_zr64, but with 3-vector i64 type.
1589define void @merge_zr64_3vec(<3 x i64>* %p) {
1590; CHECK-LABEL: merge_zr64_3vec:
1591; CHECK: // %entry
1592; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1593; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
1594; CHECK-NEXT: ret
1595entry:
1596  store <3 x i64> zeroinitializer, <3 x i64>* %p
1597  ret void
1598}
1599
1600; Like merge_zr64_2, but with 4-vector double type.
1601define void @merge_zr64_4vecd(<4 x double>* %p) {
1602; CHECK-LABEL: merge_zr64_4vecd:
1603; CHECK: // %entry
1604; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1605; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
1606; CHECK-NEXT: ret
1607entry:
1608  store <4 x double> zeroinitializer, <4 x double>* %p
1609  ret void
1610}
1611
1612; Verify that non-consecutive merges do not generate q0
1613define void @merge_multiple_128bit_stores(i64* %p) {
1614; CHECK-LABEL: merge_multiple_128bit_stores
1615; CHECK: // %entry
1616; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1617; NOSTRICTALIGN-NEXT: str q0, [x0]
1618; NOSTRICTALIGN-NEXT: stur q0, [x0, #24]
1619; NOSTRICTALIGN-NEXT: str q0, [x0, #48]
1620; STRICTALIGN-NEXT: stp xzr, xzr, [x0]
1621; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #24]
1622; STRICTALIGN-NEXT: stp xzr, xzr, [x0, #48]
1623; CHECK-NEXT: ret
1624entry:
1625  store i64 0, i64* %p
1626  %p1 = getelementptr i64, i64* %p, i64 1
1627  store i64 0, i64* %p1
1628  %p3 = getelementptr i64, i64* %p, i64 3
1629  store i64 0, i64* %p3
1630  %p4 = getelementptr i64, i64* %p, i64 4
1631  store i64 0, i64* %p4
1632  %p6 = getelementptr i64, i64* %p, i64 6
1633  store i64 0, i64* %p6
1634  %p7 = getelementptr i64, i64* %p, i64 7
1635  store i64 0, i64* %p7
1636  ret void
1637}
1638
1639; Verify that large stores generate stp q
1640define void @merge_multiple_128bit_stores_consec(i64* %p) {
1641; CHECK-LABEL: merge_multiple_128bit_stores_consec
1642; CHECK: // %entry
1643; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
1644; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
1645; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}, #32]
1646; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0]
1647; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0, #16]
1648; STRICTALIGN-NEXT: stp	 xzr, xzr, [x0, #32]
1649; STRICTALIGN-NEXT: stp  xzr, xzr, [x0, #48]
1650; CHECK-NEXT: ret
1651entry:
1652  store i64 0, i64* %p
1653  %p1 = getelementptr i64, i64* %p, i64 1
1654  store i64 0, i64* %p1
1655  %p2 = getelementptr i64, i64* %p, i64 2
1656  store i64 0, i64* %p2
1657  %p3 = getelementptr i64, i64* %p, i64 3
1658  store i64 0, i64* %p3
1659  %p4 = getelementptr i64, i64* %p, i64 4
1660  store i64 0, i64* %p4
1661  %p5 = getelementptr i64, i64* %p, i64 5
1662  store i64 0, i64* %p5
1663  %p6 = getelementptr i64, i64* %p, i64 6
1664  store i64 0, i64* %p6
1665  %p7 = getelementptr i64, i64* %p, i64 7
1666  store i64 0, i64* %p7
1667  ret void
1668}
1669
1670; Check for bug 34674 where invalid add of xzr was being generated.
1671; CHECK-LABEL: bug34674:
1672; CHECK: // %entry
1673; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
1674; CHECK-DAG: stp xzr, xzr, [x0]
1675; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
1676define i64 @bug34674(<2 x i64>* %p) {
1677entry:
1678  store <2 x i64> zeroinitializer, <2 x i64>* %p
1679  %p2 = bitcast <2 x i64>* %p to i64*
1680  %ld = load i64, i64* %p2
1681  %add = add i64 %ld, 1
1682  ret i64 %add
1683}
1684