1; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
2
3define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
4; CHECK-LABEL: test_stnp_v4i64:
5; CHECK-NEXT:  mov d[[HI1:[0-9]+]], v1[1]
6; CHECK-NEXT:  mov d[[HI0:[0-9]+]], v0[1]
7; CHECK-NEXT:  stnp d1, d[[HI1]], [x0, #16]
8; CHECK-NEXT:  stnp d0, d[[HI0]], [x0]
9; CHECK-NEXT:  ret
10  store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
11  ret void
12}
13
14define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
15; CHECK-LABEL: test_stnp_v4i32:
16; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
17; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
18; CHECK-NEXT:  ret
19  store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0
20  ret void
21}
22
23define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
24; CHECK-LABEL: test_stnp_v8i16:
25; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
26; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
27; CHECK-NEXT:  ret
28  store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0
29  ret void
30}
31
32define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
33; CHECK-LABEL: test_stnp_v16i8:
34; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
35; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
36; CHECK-NEXT:  ret
37  store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0
38  ret void
39}
40
41define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 {
42; CHECK-LABEL: test_stnp_v2i32:
43; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
44; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
45; CHECK-NEXT:  ret
46  store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0
47  ret void
48}
49
50define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 {
51; CHECK-LABEL: test_stnp_v4i16:
52; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
53; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
54; CHECK-NEXT:  ret
55  store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0
56  ret void
57}
58
59define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 {
60; CHECK-LABEL: test_stnp_v8i8:
61; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
62; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
63; CHECK-NEXT:  ret
64  store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0
65  ret void
66}
67
68define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 {
69; CHECK-LABEL: test_stnp_v2f64:
70; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
71; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
72; CHECK-NEXT:  ret
73  store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0
74  ret void
75}
76
77define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 {
78; CHECK-LABEL: test_stnp_v4f32:
79; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
80; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
81; CHECK-NEXT:  ret
82  store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0
83  ret void
84}
85
86define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 {
87; CHECK-LABEL: test_stnp_v2f32:
88; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
89; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
90; CHECK-NEXT:  ret
91  store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0
92  ret void
93}
94
95define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 {
96; CHECK-LABEL: test_stnp_v1f64:
97; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
98; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
99; CHECK-NEXT:  ret
100  store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0
101  ret void
102}
103
104define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
105; CHECK-LABEL: test_stnp_v1i64:
106; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
107; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
108; CHECK-NEXT:  ret
109  store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0
110  ret void
111}
112
113define void @test_stnp_i64(i64* %p, i64 %v) #0 {
114; CHECK-LABEL: test_stnp_i64:
115; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
116; CHECK-NEXT:  stnp w1, w[[HI]], [x0]
117; CHECK-NEXT:  ret
118  store i64 %v, i64* %p, align 1, !nontemporal !0
119  ret void
120}
121
122
123define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
124; CHECK-LABEL: test_stnp_v2f64_offset:
125; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
126; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #16]
127; CHECK-NEXT:  ret
128  %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
129  store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
130  ret void
131}
132
133define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
134; CHECK-LABEL: test_stnp_v2f64_offset_neg:
135; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
136; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-16]
137; CHECK-NEXT:  ret
138  %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
139  store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
140  ret void
141}
142
143define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
144; CHECK-LABEL: test_stnp_v2f32_offset:
145; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
146; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #8]
147; CHECK-NEXT:  ret
148  %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
149  store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
150  ret void
151}
152
153define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
154; CHECK-LABEL: test_stnp_v2f32_offset_neg:
155; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
156; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-8]
157; CHECK-NEXT:  ret
158  %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
159  store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
160  ret void
161}
162
163define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
164; CHECK-LABEL: test_stnp_i64_offset:
165; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
166; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #8]
167; CHECK-NEXT:  ret
168  %tmp0 = getelementptr i64, i64* %p, i32 1
169  store i64 %v, i64* %tmp0, align 1, !nontemporal !0
170  ret void
171}
172
173define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
174; CHECK-LABEL: test_stnp_i64_offset_neg:
175; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
176; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #-8]
177; CHECK-NEXT:  ret
178  %tmp0 = getelementptr i64, i64* %p, i32 -1
179  store i64 %v, i64* %tmp0, align 1, !nontemporal !0
180  ret void
181}
182
183define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 {
184; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
185; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #4
186; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
187; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
188; CHECK-NEXT:  ret
189  %tmp0 = getelementptr i8, i8* %p, i32 4
190  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
191  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
192  ret void
193}
194
195define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 {
196; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
197; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #4
198; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
199; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
200; CHECK-NEXT:  ret
201  %tmp0 = getelementptr i8, i8* %p, i32 -4
202  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
203  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
204  ret void
205}
206
207define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 {
208; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
209; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #512
210; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
211; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
212; CHECK-NEXT:  ret
213  %tmp0 = getelementptr i8, i8* %p, i32 512
214  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
215  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
216  ret void
217}
218
219define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 {
220; CHECK-LABEL: test_stnp_v4f32_offset_504:
221; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
222; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #504]
223; CHECK-NEXT:  ret
224  %tmp0 = getelementptr i8, i8* %p, i32 504
225  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
226  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
227  ret void
228}
229
230define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 {
231; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
232; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #508
233; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
234; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
235; CHECK-NEXT:  ret
236  %tmp0 = getelementptr i8, i8* %p, i32 508
237  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
238  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
239  ret void
240}
241
242define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 {
243; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
244; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #520
245; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
246; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
247; CHECK-NEXT:  ret
248  %tmp0 = getelementptr i8, i8* %p, i32 -520
249  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
250  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
251  ret void
252}
253
254define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 {
255; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
256; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
257; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-512]
258; CHECK-NEXT:  ret
259  %tmp0 = getelementptr i8, i8* %p, i32 -512
260  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
261  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
262  ret void
263}
264
265
266define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 {
267; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
268; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #256
269; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
270; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
271; CHECK-NEXT:  ret
272  %tmp0 = getelementptr i8, i8* %p, i32 256
273  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
274  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
275  ret void
276}
277
278define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 {
279; CHECK-LABEL: test_stnp_v2f32_offset_252:
280; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
281; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #252]
282; CHECK-NEXT:  ret
283  %tmp0 = getelementptr i8, i8* %p, i32 252
284  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
285  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
286  ret void
287}
288
289define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 {
290; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
291; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #260
292; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
293; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
294; CHECK-NEXT:  ret
295  %tmp0 = getelementptr i8, i8* %p, i32 -260
296  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
297  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
298  ret void
299}
300
301define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 {
302; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
303; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
304; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-256]
305; CHECK-NEXT:  ret
306  %tmp0 = getelementptr i8, i8* %p, i32 -256
307  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
308  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
309  ret void
310}
311
312declare void @dummy(<4 x float>*)
313
314define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
315; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
316; CHECK:       stnp d0, d{{.*}}, [sp]
317; CHECK-NEXT:  mov x0, sp
318; CHECK-NEXT:  bl _dummy
319  %tmp0 = alloca <4 x float>
320  store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0
321  call void @dummy(<4 x float>* %tmp0)
322  ret void
323}
324
325define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
326; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
327; CHECK:       stnp d0, d{{.*}}, [sp, #16]
328; CHECK-NEXT:  mov x0, sp
329; CHECK-NEXT:  bl _dummy
330  %tmp0 = alloca <4 x float>, i32 2
331  %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1
332  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
333  call void @dummy(<4 x float>* %tmp0)
334  ret void
335}
336
337!0 = !{ i32 1 }
338
339attributes #0 = { nounwind }
340