1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2
3; rdar://9428579
4
5%type1 = type { <16 x i8> }
6%type2 = type { <8 x i8> }
7%type3 = type { <4 x i16> }
8
9
10define hidden fastcc void @t1(%type1** %argtable) nounwind {
11entry:
12; CHECK-LABEL: t1:
13; CHECK: ldr x[[REG:[0-9]+]], [x0]
14; CHECK: str q0, [x[[REG]]]
15  %tmp1 = load %type1*, %type1** %argtable, align 8
16  %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
17  store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
18  ret void
19}
20
21define hidden fastcc void @t2(%type2** %argtable) nounwind {
22entry:
23; CHECK-LABEL: t2:
24; CHECK: ldr x[[REG:[0-9]+]], [x0]
25; CHECK: str d0, [x[[REG]]]
26  %tmp1 = load %type2*, %type2** %argtable, align 8
27  %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
28  store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
29  ret void
30}
31
32; add a bunch of tests for rdar://11246289
33
34@globalArray64x2 = common global <2 x i64>* null, align 8
35@globalArray32x4 = common global <4 x i32>* null, align 8
36@globalArray16x8 = common global <8 x i16>* null, align 8
37@globalArray8x16 = common global <16 x i8>* null, align 8
38@globalArray64x1 = common global <1 x i64>* null, align 8
39@globalArray32x2 = common global <2 x i32>* null, align 8
40@globalArray16x4 = common global <4 x i16>* null, align 8
41@globalArray8x8 = common global <8 x i8>* null, align 8
42@floatglobalArray64x2 = common global <2 x double>* null, align 8
43@floatglobalArray32x4 = common global <4 x float>* null, align 8
44@floatglobalArray64x1 = common global <1 x double>* null, align 8
45@floatglobalArray32x2 = common global <2 x float>* null, align 8
46
47define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
48entry:
49; CHECK-LABEL: fct1_64x2:
50; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
51; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
52; CHECK: ldr [[BASE:x[0-9]+]],
53; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
54  %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
55  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
56  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
57  %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
58  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
59  ret void
60}
61
62define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
63entry:
64; CHECK-LABEL: fct2_64x2:
65; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
66; CHECK: ldr [[BASE:x[0-9]+]],
67; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
68  %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
69  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
70  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
71  %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
72  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
73  ret void
74}
75
76define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
77entry:
78; CHECK-LABEL: fct1_32x4:
79; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
80; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
81; CHECK: ldr [[BASE:x[0-9]+]],
82; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
83  %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
84  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
85  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
86  %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
87  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
88  ret void
89}
90
91define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
92entry:
93; CHECK-LABEL: fct2_32x4:
94; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
95; CHECK: ldr [[BASE:x[0-9]+]],
96; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
97  %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
98  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
99  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
100  %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
101  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
102  ret void
103}
104
105define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
106entry:
107; CHECK-LABEL: fct1_16x8:
108; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
109; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
110; CHECK: ldr [[BASE:x[0-9]+]],
111; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
112  %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
113  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
114  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
115  %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
116  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
117  ret void
118}
119
120define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
121entry:
122; CHECK-LABEL: fct2_16x8:
123; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
124; CHECK: ldr [[BASE:x[0-9]+]],
125; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
126  %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
127  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
128  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
129  %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
130  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
131  ret void
132}
133
134define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
135entry:
136; CHECK-LABEL: fct1_8x16:
137; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
138; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
139; CHECK: ldr [[BASE:x[0-9]+]],
140; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
141  %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
142  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
143  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
144  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
145  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
146  ret void
147}
148
149define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
150entry:
151; CHECK-LABEL: fct2_8x16:
152; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
153; CHECK: ldr [[BASE:x[0-9]+]],
154; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
155  %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
156  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
157  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
158  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
159  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
160  ret void
161}
162
163define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
164entry:
165; CHECK-LABEL: fct1_64x1:
166; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
167; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
168; CHECK: ldr [[BASE:x[0-9]+]],
169; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
170  %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
171  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
172  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
173  %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
174  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
175  ret void
176}
177
178define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
179entry:
180; CHECK-LABEL: fct2_64x1:
181; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
182; CHECK: ldr [[BASE:x[0-9]+]],
183; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
184  %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
185  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
186  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
187  %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
188  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
189  ret void
190}
191
192define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
193entry:
194; CHECK-LABEL: fct1_32x2:
195; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
196; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
197; CHECK: ldr [[BASE:x[0-9]+]],
198; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
199  %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
200  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
201  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
202  %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
203  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
204  ret void
205}
206
207define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
208entry:
209; CHECK-LABEL: fct2_32x2:
210; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
211; CHECK: ldr [[BASE:x[0-9]+]],
212; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
213  %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
214  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
215  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
216  %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
217  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
218  ret void
219}
220
221define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
222entry:
223; CHECK-LABEL: fct1_16x4:
224; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
225; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
226; CHECK: ldr [[BASE:x[0-9]+]],
227; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
228  %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
229  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
230  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
231  %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
232  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
233  ret void
234}
235
236define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
237entry:
238; CHECK-LABEL: fct2_16x4:
239; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
240; CHECK: ldr [[BASE:x[0-9]+]],
241; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
242  %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
243  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
244  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
245  %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
246  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
247  ret void
248}
249
250define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
251entry:
252; CHECK-LABEL: fct1_8x8:
253; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
254; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
255; CHECK: ldr [[BASE:x[0-9]+]],
256; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
257  %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
258  %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
259  %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
260  %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
261  store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
262  ret void
263}
264
265; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
266; registers for unscaled vector accesses
267@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
268
269define <1 x i64> @fct0() nounwind readonly ssp {
270entry:
271; CHECK-LABEL: fct0:
272; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
273  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
274  ret <1 x i64> %0
275}
276
277define <2 x i32> @fct1() nounwind readonly ssp {
278entry:
279; CHECK-LABEL: fct1:
280; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
281  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
282  ret <2 x i32> %0
283}
284
285define <4 x i16> @fct2() nounwind readonly ssp {
286entry:
287; CHECK-LABEL: fct2:
288; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
289  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
290  ret <4 x i16> %0
291}
292
293define <8 x i8> @fct3() nounwind readonly ssp {
294entry:
295; CHECK-LABEL: fct3:
296; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
297  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
298  ret <8 x i8> %0
299}
300
301define <2 x i64> @fct4() nounwind readonly ssp {
302entry:
303; CHECK-LABEL: fct4:
304; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
305  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
306  ret <2 x i64> %0
307}
308
309define <4 x i32> @fct5() nounwind readonly ssp {
310entry:
311; CHECK-LABEL: fct5:
312; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
313  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
314  ret <4 x i32> %0
315}
316
317define <8 x i16> @fct6() nounwind readonly ssp {
318entry:
319; CHECK-LABEL: fct6:
320; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
321  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
322  ret <8 x i16> %0
323}
324
325define <16 x i8> @fct7() nounwind readonly ssp {
326entry:
327; CHECK-LABEL: fct7:
328; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
329  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
330  ret <16 x i8> %0
331}
332
333define void @fct8() nounwind ssp {
334entry:
335; CHECK-LABEL: fct8:
336; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
337; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
338  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
339  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
340  ret void
341}
342
343define void @fct9() nounwind ssp {
344entry:
345; CHECK-LABEL: fct9:
346; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
347; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
348  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
349  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
350  ret void
351}
352
353define void @fct10() nounwind ssp {
354entry:
355; CHECK-LABEL: fct10:
356; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
357; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
358  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
359  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
360  ret void
361}
362
363define void @fct11() nounwind ssp {
364entry:
365; CHECK-LABEL: fct11:
366; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
367; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
368  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
369  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
370  ret void
371}
372
373define void @fct12() nounwind ssp {
374entry:
375; CHECK-LABEL: fct12:
376; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
377; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
378  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
379  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
380  ret void
381}
382
383define void @fct13() nounwind ssp {
384entry:
385; CHECK-LABEL: fct13:
386; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
387; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
388  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
389  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
390  ret void
391}
392
393define void @fct14() nounwind ssp {
394entry:
395; CHECK-LABEL: fct14:
396; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
397; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
398  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
399  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
400  ret void
401}
402
403define void @fct15() nounwind ssp {
404entry:
405; CHECK-LABEL: fct15:
406; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
407; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
408  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
409  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
410  ret void
411}
412
413; Check the building of vector from a single loaded value.
414; Part of <rdar://problem/14170854>
415;
416; Single loads with immediate offset.
417define <8 x i8> @fct16(i8* nocapture %sp0) {
418; CHECK-LABEL: fct16:
419; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
420; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
421entry:
422  %addr = getelementptr i8, i8* %sp0, i64 1
423  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
424  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
425  %vmull.i = mul <8 x i8> %vec, %vec
426  ret <8 x i8> %vmull.i
427}
428
429define <16 x i8> @fct17(i8* nocapture %sp0) {
430; CHECK-LABEL: fct17:
431; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
432; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
433entry:
434  %addr = getelementptr i8, i8* %sp0, i64 1
435  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
436  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
437  %vmull.i = mul <16 x i8> %vec, %vec
438  ret <16 x i8> %vmull.i
439}
440
441define <4 x i16> @fct18(i16* nocapture %sp0) {
442; CHECK-LABEL: fct18:
443; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
444; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
445entry:
446  %addr = getelementptr i16, i16* %sp0, i64 1
447  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
448  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
449  %vmull.i = mul <4 x i16> %vec, %vec
450  ret <4 x i16> %vmull.i
451}
452
453define <8 x i16> @fct19(i16* nocapture %sp0) {
454; CHECK-LABEL: fct19:
455; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
456; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
457entry:
458  %addr = getelementptr i16, i16* %sp0, i64 1
459  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
460  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
461  %vmull.i = mul <8 x i16> %vec, %vec
462  ret <8 x i16> %vmull.i
463}
464
465define <2 x i32> @fct20(i32* nocapture %sp0) {
466; CHECK-LABEL: fct20:
467; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
468; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
469entry:
470  %addr = getelementptr i32, i32* %sp0, i64 1
471  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
472  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
473  %vmull.i = mul <2 x i32> %vec, %vec
474  ret <2 x i32> %vmull.i
475}
476
477define <4 x i32> @fct21(i32* nocapture %sp0) {
478; CHECK-LABEL: fct21:
479; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
480; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
481entry:
482  %addr = getelementptr i32, i32* %sp0, i64 1
483  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
484  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
485  %vmull.i = mul <4 x i32> %vec, %vec
486  ret <4 x i32> %vmull.i
487}
488
489define <1 x i64> @fct22(i64* nocapture %sp0) {
490; CHECK-LABEL: fct22:
491; CHECK: ldr d0, [x0, #8]
492entry:
493  %addr = getelementptr i64, i64* %sp0, i64 1
494  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
495  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
496   ret <1 x i64> %vec
497}
498
499define <2 x i64> @fct23(i64* nocapture %sp0) {
500; CHECK-LABEL: fct23:
501; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
502entry:
503  %addr = getelementptr i64, i64* %sp0, i64 1
504  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
505  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
506  ret <2 x i64> %vec
507}
508
509;
510; Single loads with register offset.
511define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
512; CHECK-LABEL: fct24:
513; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
514; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
515entry:
516  %addr = getelementptr i8, i8* %sp0, i64 %offset
517  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
518  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
519  %vmull.i = mul <8 x i8> %vec, %vec
520  ret <8 x i8> %vmull.i
521}
522
523define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
524; CHECK-LABEL: fct25:
525; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
526; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
527entry:
528  %addr = getelementptr i8, i8* %sp0, i64 %offset
529  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
530  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
531  %vmull.i = mul <16 x i8> %vec, %vec
532  ret <16 x i8> %vmull.i
533}
534
535define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
536; CHECK-LABEL: fct26:
537; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
538; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
539entry:
540  %addr = getelementptr i16, i16* %sp0, i64 %offset
541  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
542  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
543  %vmull.i = mul <4 x i16> %vec, %vec
544  ret <4 x i16> %vmull.i
545}
546
547define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
548; CHECK-LABEL: fct27:
549; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
550; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
551entry:
552  %addr = getelementptr i16, i16* %sp0, i64 %offset
553  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
554  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
555  %vmull.i = mul <8 x i16> %vec, %vec
556  ret <8 x i16> %vmull.i
557}
558
559define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
560; CHECK-LABEL: fct28:
561; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
562; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
563entry:
564  %addr = getelementptr i32, i32* %sp0, i64 %offset
565  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
566  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
567  %vmull.i = mul <2 x i32> %vec, %vec
568  ret <2 x i32> %vmull.i
569}
570
571define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
572; CHECK-LABEL: fct29:
573; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
574; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
575entry:
576  %addr = getelementptr i32, i32* %sp0, i64 %offset
577  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
578  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
579  %vmull.i = mul <4 x i32> %vec, %vec
580  ret <4 x i32> %vmull.i
581}
582
583define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
584; CHECK-LABEL: fct30:
585; CHECK: ldr d0, [x0, x1, lsl #3]
586entry:
587  %addr = getelementptr i64, i64* %sp0, i64 %offset
588  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
589  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
590   ret <1 x i64> %vec
591}
592
593define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
594; CHECK-LABEL: fct31:
595; CHECK: ldr d0, [x0, x1, lsl #3]
596entry:
597  %addr = getelementptr i64, i64* %sp0, i64 %offset
598  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
599  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
600  ret <2 x i64> %vec
601}
602