1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2
3; rdar://9428579
4
5%type1 = type { <16 x i8> }
6%type2 = type { <8 x i8> }
7%type3 = type { <4 x i16> }
8
9
10define hidden fastcc void @t1(%type1** %argtable) nounwind {
11entry:
12; CHECK-LABEL: t1:
13; CHECK: ldr x[[REG:[0-9]+]], [x0]
14; CHECK: str q0, [x[[REG]]]
15  %tmp1 = load %type1*, %type1** %argtable, align 8
16  %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
17  store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
18  ret void
19}
20
21define hidden fastcc void @t2(%type2** %argtable) nounwind {
22entry:
23; CHECK-LABEL: t2:
24; CHECK: ldr x[[REG:[0-9]+]], [x0]
25; CHECK: str d0, [x[[REG]]]
26  %tmp1 = load %type2*, %type2** %argtable, align 8
27  %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
28  store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
29  ret void
30}
31
32; add a bunch of tests for rdar://11246289
33
34@globalArray64x2 = common global <2 x i64>* null, align 8
35@globalArray32x4 = common global <4 x i32>* null, align 8
36@globalArray16x8 = common global <8 x i16>* null, align 8
37@globalArray8x16 = common global <16 x i8>* null, align 8
38@globalArray64x1 = common global <1 x i64>* null, align 8
39@globalArray32x2 = common global <2 x i32>* null, align 8
40@globalArray16x4 = common global <4 x i16>* null, align 8
41@globalArray8x8 = common global <8 x i8>* null, align 8
42@floatglobalArray64x2 = common global <2 x double>* null, align 8
43@floatglobalArray32x4 = common global <4 x float>* null, align 8
44@floatglobalArray64x1 = common global <1 x double>* null, align 8
45@floatglobalArray32x2 = common global <2 x float>* null, align 8
46
47define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
48entry:
49; CHECK-LABEL: fct1_64x2:
50; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
51; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
52; CHECK: ldr [[BASE:x[0-9]+]],
53; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
54  %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
55  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
56  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
57  %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
58  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
59  ret void
60}
61
62define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
63entry:
64; CHECK-LABEL: fct2_64x2:
65; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
66; CHECK: ldr [[BASE:x[0-9]+]],
67; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
68  %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
69  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
70  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
71  %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
72  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
73  ret void
74}
75
76define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
77entry:
78; CHECK-LABEL: fct1_32x4:
79; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
80; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
81; CHECK: ldr [[BASE:x[0-9]+]],
82; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
83  %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
84  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
85  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
86  %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
87  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
88  ret void
89}
90
91define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
92entry:
93; CHECK-LABEL: fct2_32x4:
94; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
95; CHECK: ldr [[BASE:x[0-9]+]],
96; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
97  %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
98  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
99  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
100  %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
101  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
102  ret void
103}
104
105define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
106entry:
107; CHECK-LABEL: fct1_16x8:
108; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
109; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
110; CHECK: ldr [[BASE:x[0-9]+]],
111; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
112  %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
113  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
114  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
115  %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
116  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
117  ret void
118}
119
120define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
121entry:
122; CHECK-LABEL: fct2_16x8:
123; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
124; CHECK: ldr [[BASE:x[0-9]+]],
125; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
126  %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
127  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
128  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
129  %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
130  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
131  ret void
132}
133
134define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
135entry:
136; CHECK-LABEL: fct1_8x16:
137; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
138; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
139; CHECK: ldr [[BASE:x[0-9]+]],
140; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
141  %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
142  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
143  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
144  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
145  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
146  ret void
147}
148
149define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
150entry:
151; CHECK-LABEL: fct2_8x16:
152; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
153; CHECK: ldr [[BASE:x[0-9]+]],
154; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
155  %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
156  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
157  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
158  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
159  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
160  ret void
161}
162
163define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
164entry:
165; CHECK-LABEL: fct1_64x1:
166; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
167; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
168; CHECK: ldr [[BASE:x[0-9]+]],
169; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
170  %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
171  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
172  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
173  %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
174  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
175  ret void
176}
177
178define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
179entry:
180; CHECK-LABEL: fct2_64x1:
181; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
182; CHECK: ldr [[BASE:x[0-9]+]],
183; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
184  %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
185  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
186  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
187  %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
188  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
189  ret void
190}
191
192define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
193entry:
194; CHECK-LABEL: fct1_32x2:
195; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
196; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
197; CHECK: ldr [[BASE:x[0-9]+]],
198; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
199  %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
200  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
201  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
202  %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
203  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
204  ret void
205}
206
207define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
208entry:
209; CHECK-LABEL: fct2_32x2:
210; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
211; CHECK: ldr [[BASE:x[0-9]+]],
212; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
213  %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
214  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
215  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
216  %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
217  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
218  ret void
219}
220
221define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
222entry:
223; CHECK-LABEL: fct1_16x4:
224; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
225; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
226; CHECK: ldr [[BASE:x[0-9]+]],
227; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
228  %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
229  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
230  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
231  %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
232  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
233  ret void
234}
235
236define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
237entry:
238; CHECK-LABEL: fct2_16x4:
239; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
240; CHECK: ldr [[BASE:x[0-9]+]],
241; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
242  %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
243  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
244  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
245  %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
246  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
247  ret void
248}
249
250define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
251entry:
252; CHECK-LABEL: fct1_8x8:
253; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
254; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
255; CHECK: ldr [[BASE:x[0-9]+]],
256; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
257  %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
258  %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
259  %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
260  %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
261  store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
262  ret void
263}
264
265; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
266; registers for unscaled vector accesses
267
268define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
269entry:
270; CHECK-LABEL: fct0:
271; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
272  %p = getelementptr inbounds i8, i8* %str, i64 3
273  %q = bitcast i8* %p to <1 x i64>*
274  %0 = load <1 x i64>, <1 x i64>* %q, align 8
275  ret <1 x i64> %0
276}
277
278define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
279entry:
280; CHECK-LABEL: fct1:
281; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
282  %p = getelementptr inbounds i8, i8* %str, i64 3
283  %q = bitcast i8* %p to <2 x i32>*
284  %0 = load <2 x i32>, <2 x i32>* %q, align 8
285  ret <2 x i32> %0
286}
287
288define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
289entry:
290; CHECK-LABEL: fct2:
291; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
292  %p = getelementptr inbounds i8, i8* %str, i64 3
293  %q = bitcast i8* %p to <4 x i16>*
294  %0 = load <4 x i16>, <4 x i16>* %q, align 8
295  ret <4 x i16> %0
296}
297
298define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
299entry:
300; CHECK-LABEL: fct3:
301; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
302  %p = getelementptr inbounds i8, i8* %str, i64 3
303  %q = bitcast i8* %p to <8 x i8>*
304  %0 = load <8 x i8>, <8 x i8>* %q, align 8
305  ret <8 x i8> %0
306}
307
308define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
309entry:
310; CHECK-LABEL: fct4:
311; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
312  %p = getelementptr inbounds i8, i8* %str, i64 3
313  %q = bitcast i8* %p to <2 x i64>*
314  %0 = load <2 x i64>, <2 x i64>* %q, align 16
315  ret <2 x i64> %0
316}
317
318define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
319entry:
320; CHECK-LABEL: fct5:
321; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
322  %p = getelementptr inbounds i8, i8* %str, i64 3
323  %q = bitcast i8* %p to <4 x i32>*
324  %0 = load <4 x i32>, <4 x i32>* %q, align 16
325  ret <4 x i32> %0
326}
327
328define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
329entry:
330; CHECK-LABEL: fct6:
331; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
332  %p = getelementptr inbounds i8, i8* %str, i64 3
333  %q = bitcast i8* %p to <8 x i16>*
334  %0 = load <8 x i16>, <8 x i16>* %q, align 16
335  ret <8 x i16> %0
336}
337
338define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
339entry:
340; CHECK-LABEL: fct7:
341; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
342  %p = getelementptr inbounds i8, i8* %str, i64 3
343  %q = bitcast i8* %p to <16 x i8>*
344  %0 = load <16 x i8>, <16 x i8>* %q, align 16
345  ret <16 x i8> %0
346}
347
348define void @fct8(i8* %str) nounwind ssp {
349entry:
350; CHECK-LABEL: fct8:
351; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
352; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
353  %p = getelementptr inbounds i8, i8* %str, i64 3
354  %q = bitcast i8* %p to <1 x i64>*
355  %0 = load <1 x i64>, <1 x i64>* %q, align 8
356  %p2 = getelementptr inbounds i8, i8* %str, i64 4
357  %q2 = bitcast i8* %p2 to <1 x i64>*
358  store <1 x i64> %0, <1 x i64>* %q2, align 8
359  ret void
360}
361
362define void @fct9(i8* %str) nounwind ssp {
363entry:
364; CHECK-LABEL: fct9:
365; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
366; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
367  %p = getelementptr inbounds i8, i8* %str, i64 3
368  %q = bitcast i8* %p to <2 x i32>*
369  %0 = load <2 x i32>, <2 x i32>* %q, align 8
370  %p2 = getelementptr inbounds i8, i8* %str, i64 4
371  %q2 = bitcast i8* %p2 to <2 x i32>*
372  store <2 x i32> %0, <2 x i32>* %q2, align 8
373  ret void
374}
375
376define void @fct10(i8* %str) nounwind ssp {
377entry:
378; CHECK-LABEL: fct10:
379; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
380; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
381  %p = getelementptr inbounds i8, i8* %str, i64 3
382  %q = bitcast i8* %p to <4 x i16>*
383  %0 = load <4 x i16>, <4 x i16>* %q, align 8
384  %p2 = getelementptr inbounds i8, i8* %str, i64 4
385  %q2 = bitcast i8* %p2 to <4 x i16>*
386  store <4 x i16> %0, <4 x i16>* %q2, align 8
387  ret void
388}
389
390define void @fct11(i8* %str) nounwind ssp {
391entry:
392; CHECK-LABEL: fct11:
393; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
394; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
395  %p = getelementptr inbounds i8, i8* %str, i64 3
396  %q = bitcast i8* %p to <8 x i8>*
397  %0 = load <8 x i8>, <8 x i8>* %q, align 8
398  %p2 = getelementptr inbounds i8, i8* %str, i64 4
399  %q2 = bitcast i8* %p2 to <8 x i8>*
400  store <8 x i8> %0, <8 x i8>* %q2, align 8
401  ret void
402}
403
404define void @fct12(i8* %str) nounwind ssp {
405entry:
406; CHECK-LABEL: fct12:
407; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
408; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
409  %p = getelementptr inbounds i8, i8* %str, i64 3
410  %q = bitcast i8* %p to <2 x i64>*
411  %0 = load <2 x i64>, <2 x i64>* %q, align 16
412  %p2 = getelementptr inbounds i8, i8* %str, i64 4
413  %q2 = bitcast i8* %p2 to <2 x i64>*
414  store <2 x i64> %0, <2 x i64>* %q2, align 16
415  ret void
416}
417
418define void @fct13(i8* %str) nounwind ssp {
419entry:
420; CHECK-LABEL: fct13:
421; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
422; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
423  %p = getelementptr inbounds i8, i8* %str, i64 3
424  %q = bitcast i8* %p to <4 x i32>*
425  %0 = load <4 x i32>, <4 x i32>* %q, align 16
426  %p2 = getelementptr inbounds i8, i8* %str, i64 4
427  %q2 = bitcast i8* %p2 to <4 x i32>*
428  store <4 x i32> %0, <4 x i32>* %q2, align 16
429  ret void
430}
431
432define void @fct14(i8* %str) nounwind ssp {
433entry:
434; CHECK-LABEL: fct14:
435; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
436; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
437  %p = getelementptr inbounds i8, i8* %str, i64 3
438  %q = bitcast i8* %p to <8 x i16>*
439  %0 = load <8 x i16>, <8 x i16>* %q, align 16
440  %p2 = getelementptr inbounds i8, i8* %str, i64 4
441  %q2 = bitcast i8* %p2 to <8 x i16>*
442  store <8 x i16> %0, <8 x i16>* %q2, align 16
443  ret void
444}
445
446define void @fct15(i8* %str) nounwind ssp {
447entry:
448; CHECK-LABEL: fct15:
449; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
450; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
451  %p = getelementptr inbounds i8, i8* %str, i64 3
452  %q = bitcast i8* %p to <16 x i8>*
453  %0 = load <16 x i8>, <16 x i8>* %q, align 16
454  %p2 = getelementptr inbounds i8, i8* %str, i64 4
455  %q2 = bitcast i8* %p2 to <16 x i8>*
456  store <16 x i8> %0, <16 x i8>* %q2, align 16
457  ret void
458}
459
460; Check the building of vector from a single loaded value.
461; Part of <rdar://problem/14170854>
462;
463; Single loads with immediate offset.
464define <8 x i8> @fct16(i8* nocapture %sp0) {
465; CHECK-LABEL: fct16:
466; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
467; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
468entry:
469  %addr = getelementptr i8, i8* %sp0, i64 1
470  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
471  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
472  %vmull.i = mul <8 x i8> %vec, %vec
473  ret <8 x i8> %vmull.i
474}
475
476define <16 x i8> @fct17(i8* nocapture %sp0) {
477; CHECK-LABEL: fct17:
478; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
479; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
480entry:
481  %addr = getelementptr i8, i8* %sp0, i64 1
482  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
483  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
484  %vmull.i = mul <16 x i8> %vec, %vec
485  ret <16 x i8> %vmull.i
486}
487
488define <4 x i16> @fct18(i16* nocapture %sp0) {
489; CHECK-LABEL: fct18:
490; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
491; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
492entry:
493  %addr = getelementptr i16, i16* %sp0, i64 1
494  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
495  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
496  %vmull.i = mul <4 x i16> %vec, %vec
497  ret <4 x i16> %vmull.i
498}
499
500define <8 x i16> @fct19(i16* nocapture %sp0) {
501; CHECK-LABEL: fct19:
502; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
503; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
504entry:
505  %addr = getelementptr i16, i16* %sp0, i64 1
506  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
507  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
508  %vmull.i = mul <8 x i16> %vec, %vec
509  ret <8 x i16> %vmull.i
510}
511
512define <2 x i32> @fct20(i32* nocapture %sp0) {
513; CHECK-LABEL: fct20:
514; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
515; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
516entry:
517  %addr = getelementptr i32, i32* %sp0, i64 1
518  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
519  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
520  %vmull.i = mul <2 x i32> %vec, %vec
521  ret <2 x i32> %vmull.i
522}
523
524define <4 x i32> @fct21(i32* nocapture %sp0) {
525; CHECK-LABEL: fct21:
526; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
527; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
528entry:
529  %addr = getelementptr i32, i32* %sp0, i64 1
530  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
531  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
532  %vmull.i = mul <4 x i32> %vec, %vec
533  ret <4 x i32> %vmull.i
534}
535
536define <1 x i64> @fct22(i64* nocapture %sp0) {
537; CHECK-LABEL: fct22:
538; CHECK: ldr d0, [x0, #8]
539entry:
540  %addr = getelementptr i64, i64* %sp0, i64 1
541  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
542  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
543   ret <1 x i64> %vec
544}
545
546define <2 x i64> @fct23(i64* nocapture %sp0) {
547; CHECK-LABEL: fct23:
548; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
549entry:
550  %addr = getelementptr i64, i64* %sp0, i64 1
551  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
552  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
553  ret <2 x i64> %vec
554}
555
556;
557; Single loads with register offset.
558define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
559; CHECK-LABEL: fct24:
560; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
561; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
562entry:
563  %addr = getelementptr i8, i8* %sp0, i64 %offset
564  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
565  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
566  %vmull.i = mul <8 x i8> %vec, %vec
567  ret <8 x i8> %vmull.i
568}
569
570define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
571; CHECK-LABEL: fct25:
572; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
573; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
574entry:
575  %addr = getelementptr i8, i8* %sp0, i64 %offset
576  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
577  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
578  %vmull.i = mul <16 x i8> %vec, %vec
579  ret <16 x i8> %vmull.i
580}
581
582define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
583; CHECK-LABEL: fct26:
584; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
585; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
586entry:
587  %addr = getelementptr i16, i16* %sp0, i64 %offset
588  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
589  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
590  %vmull.i = mul <4 x i16> %vec, %vec
591  ret <4 x i16> %vmull.i
592}
593
594define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
595; CHECK-LABEL: fct27:
596; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
597; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
598entry:
599  %addr = getelementptr i16, i16* %sp0, i64 %offset
600  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
601  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
602  %vmull.i = mul <8 x i16> %vec, %vec
603  ret <8 x i16> %vmull.i
604}
605
606define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
607; CHECK-LABEL: fct28:
608; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
609; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
610entry:
611  %addr = getelementptr i32, i32* %sp0, i64 %offset
612  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
613  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
614  %vmull.i = mul <2 x i32> %vec, %vec
615  ret <2 x i32> %vmull.i
616}
617
618define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
619; CHECK-LABEL: fct29:
620; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
621; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
622entry:
623  %addr = getelementptr i32, i32* %sp0, i64 %offset
624  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
625  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
626  %vmull.i = mul <4 x i32> %vec, %vec
627  ret <4 x i32> %vmull.i
628}
629
630define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
631; CHECK-LABEL: fct30:
632; CHECK: ldr d0, [x0, x1, lsl #3]
633entry:
634  %addr = getelementptr i64, i64* %sp0, i64 %offset
635  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
636  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
637   ret <1 x i64> %vec
638}
639
640define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
641; CHECK-LABEL: fct31:
642; CHECK: ldr d0, [x0, x1, lsl #3]
643entry:
644  %addr = getelementptr i64, i64* %sp0, i64 %offset
645  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
646  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
647  ret <2 x i64> %vec
648}
649