1; RUN: llc < %s | FileCheck %s
2
3target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
4target triple = "thumbv7s-apple-ios8.0.0"
5
6define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
7;CHECK-LABEL: load_v8i8:
8;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
9	%A = load <8 x i8>*, <8 x i8>** %ptr
10	%lA = load <8 x i8>, <8 x i8>* %A, align 1
11	ret <8 x i8> %lA
12}
13
14define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
15;CHECK-LABEL: load_v8i8_update:
16;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
17	%A = load <8 x i8>*, <8 x i8>** %ptr
18	%lA = load <8 x i8>, <8 x i8>* %A, align 1
19	%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
20        store <8 x i8>* %inc, <8 x i8>** %ptr
21	ret <8 x i8> %lA
22}
23
24define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
25;CHECK-LABEL: load_v4i16:
26;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
27	%A = load <4 x i16>*, <4 x i16>** %ptr
28	%lA = load <4 x i16>, <4 x i16>* %A, align 1
29	ret <4 x i16> %lA
30}
31
32define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
33;CHECK-LABEL: load_v4i16_update:
34;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
35	%A = load <4 x i16>*, <4 x i16>** %ptr
36	%lA = load <4 x i16>, <4 x i16>* %A, align 1
37	%inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
38        store <4 x i16>* %inc, <4 x i16>** %ptr
39	ret <4 x i16> %lA
40}
41
42define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
43;CHECK-LABEL: load_v2i32:
44;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
45	%A = load <2 x i32>*, <2 x i32>** %ptr
46	%lA = load <2 x i32>, <2 x i32>* %A, align 1
47	ret <2 x i32> %lA
48}
49
50define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
51;CHECK-LABEL: load_v2i32_update:
52;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
53	%A = load <2 x i32>*, <2 x i32>** %ptr
54	%lA = load <2 x i32>, <2 x i32>* %A, align 1
55	%inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
56        store <2 x i32>* %inc, <2 x i32>** %ptr
57	ret <2 x i32> %lA
58}
59
60define <2 x float> @load_v2f32(<2 x float>** %ptr) {
61;CHECK-LABEL: load_v2f32:
62;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
63	%A = load <2 x float>*, <2 x float>** %ptr
64	%lA = load <2 x float>, <2 x float>* %A, align 1
65	ret <2 x float> %lA
66}
67
68define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
69;CHECK-LABEL: load_v2f32_update:
70;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
71	%A = load <2 x float>*, <2 x float>** %ptr
72	%lA = load <2 x float>, <2 x float>* %A, align 1
73	%inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
74        store <2 x float>* %inc, <2 x float>** %ptr
75	ret <2 x float> %lA
76}
77
78define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
79;CHECK-LABEL: load_v1i64:
80;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
81	%A = load <1 x i64>*, <1 x i64>** %ptr
82	%lA = load <1 x i64>, <1 x i64>* %A, align 1
83	ret <1 x i64> %lA
84}
85
86define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
87;CHECK-LABEL: load_v1i64_update:
88;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
89	%A = load <1 x i64>*, <1 x i64>** %ptr
90	%lA = load <1 x i64>, <1 x i64>* %A, align 1
91	%inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
92        store <1 x i64>* %inc, <1 x i64>** %ptr
93	ret <1 x i64> %lA
94}
95
96define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
97;CHECK-LABEL: load_v16i8:
98;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
99	%A = load <16 x i8>*, <16 x i8>** %ptr
100	%lA = load <16 x i8>, <16 x i8>* %A, align 1
101	ret <16 x i8> %lA
102}
103
104define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
105;CHECK-LABEL: load_v16i8_update:
106;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
107	%A = load <16 x i8>*, <16 x i8>** %ptr
108	%lA = load <16 x i8>, <16 x i8>* %A, align 1
109	%inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
110        store <16 x i8>* %inc, <16 x i8>** %ptr
111	ret <16 x i8> %lA
112}
113
114define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
115;CHECK-LABEL: load_v8i16:
116;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
117	%A = load <8 x i16>*, <8 x i16>** %ptr
118	%lA = load <8 x i16>, <8 x i16>* %A, align 1
119	ret <8 x i16> %lA
120}
121
122define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
123;CHECK-LABEL: load_v8i16_update:
124;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
125	%A = load <8 x i16>*, <8 x i16>** %ptr
126	%lA = load <8 x i16>, <8 x i16>* %A, align 1
127	%inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
128        store <8 x i16>* %inc, <8 x i16>** %ptr
129	ret <8 x i16> %lA
130}
131
132define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
133;CHECK-LABEL: load_v4i32:
134;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
135	%A = load <4 x i32>*, <4 x i32>** %ptr
136	%lA = load <4 x i32>, <4 x i32>* %A, align 1
137	ret <4 x i32> %lA
138}
139
140define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
141;CHECK-LABEL: load_v4i32_update:
142;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
143	%A = load <4 x i32>*, <4 x i32>** %ptr
144	%lA = load <4 x i32>, <4 x i32>* %A, align 1
145	%inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
146        store <4 x i32>* %inc, <4 x i32>** %ptr
147	ret <4 x i32> %lA
148}
149
150define <4 x float> @load_v4f32(<4 x float>** %ptr) {
151;CHECK-LABEL: load_v4f32:
152;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
153	%A = load <4 x float>*, <4 x float>** %ptr
154	%lA = load <4 x float>, <4 x float>* %A, align 1
155	ret <4 x float> %lA
156}
157
158define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
159;CHECK-LABEL: load_v4f32_update:
160;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
161	%A = load <4 x float>*, <4 x float>** %ptr
162	%lA = load <4 x float>, <4 x float>* %A, align 1
163	%inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
164        store <4 x float>* %inc, <4 x float>** %ptr
165	ret <4 x float> %lA
166}
167
168define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
169;CHECK-LABEL: load_v2i64:
170;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
171	%A = load <2 x i64>*, <2 x i64>** %ptr
172	%lA = load <2 x i64>, <2 x i64>* %A, align 1
173	ret <2 x i64> %lA
174}
175
176define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
177;CHECK-LABEL: load_v2i64_update:
178;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
179	%A = load <2 x i64>*, <2 x i64>** %ptr
180	%lA = load <2 x i64>, <2 x i64>* %A, align 1
181	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
182        store <2 x i64>* %inc, <2 x i64>** %ptr
183	ret <2 x i64> %lA
184}
185
186; Make sure we change the type to match alignment if necessary.
187define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
188;CHECK-LABEL: load_v2i64_update_aligned2:
189;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
190	%A = load <2 x i64>*, <2 x i64>** %ptr
191	%lA = load <2 x i64>, <2 x i64>* %A, align 2
192	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
193        store <2 x i64>* %inc, <2 x i64>** %ptr
194	ret <2 x i64> %lA
195}
196
197define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
198;CHECK-LABEL: load_v2i64_update_aligned4:
199;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
200	%A = load <2 x i64>*, <2 x i64>** %ptr
201	%lA = load <2 x i64>, <2 x i64>* %A, align 4
202	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
203        store <2 x i64>* %inc, <2 x i64>** %ptr
204	ret <2 x i64> %lA
205}
206
207define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
208;CHECK-LABEL: load_v2i64_update_aligned8:
209;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
210	%A = load <2 x i64>*, <2 x i64>** %ptr
211	%lA = load <2 x i64>, <2 x i64>* %A, align 8
212	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
213        store <2 x i64>* %inc, <2 x i64>** %ptr
214	ret <2 x i64> %lA
215}
216
217define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
218;CHECK-LABEL: load_v2i64_update_aligned16:
219;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
220	%A = load <2 x i64>*, <2 x i64>** %ptr
221	%lA = load <2 x i64>, <2 x i64>* %A, align 16
222	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
223        store <2 x i64>* %inc, <2 x i64>** %ptr
224	ret <2 x i64> %lA
225}
226
227; Make sure we don't break smaller-than-dreg extloads.
228define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
229;CHECK-LABEL: zextload_v8i8tov8i32:
230;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
231;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
232;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
233	%A = load <4 x i8>*, <4 x i8>** %ptr
234	%lA = load <4 x i8>, <4 x i8>* %A, align 4
235        %zlA = zext <4 x i8> %lA to <4 x i32>
236	ret <4 x i32> %zlA
237}
238
239define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
240;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
241;CHECK: ldr   r[[PTRREG:[0-9]+]], [r0]
242;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
243;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16
244;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
245;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
246;CHECK: str   r[[INCREG]], [r0]
247	%A = load <4 x i8>*, <4 x i8>** %ptr
248	%lA = load <4 x i8>, <4 x i8>* %A, align 4
249	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
250        store <4 x i8>* %inc, <4 x i8>** %ptr
251        %zlA = zext <4 x i8> %lA to <4 x i32>
252	ret <4 x i32> %zlA
253}
254