1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2
3define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
4;CHECK-LABEL: frecps_2s:
5;CHECK: frecps.2s
6	%tmp1 = load <2 x float>, <2 x float>* %A
7	%tmp2 = load <2 x float>, <2 x float>* %B
8	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
9	ret <2 x float> %tmp3
10}
11
12define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
13;CHECK-LABEL: frecps_4s:
14;CHECK: frecps.4s
15	%tmp1 = load <4 x float>, <4 x float>* %A
16	%tmp2 = load <4 x float>, <4 x float>* %B
17	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
18	ret <4 x float> %tmp3
19}
20
21define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
22;CHECK-LABEL: frecps_2d:
23;CHECK: frecps.2d
24	%tmp1 = load <2 x double>, <2 x double>* %A
25	%tmp2 = load <2 x double>, <2 x double>* %B
26	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
27	ret <2 x double> %tmp3
28}
29
30declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
31declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
32declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
33
34
35define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
36;CHECK-LABEL: frsqrts_2s:
37;CHECK: frsqrts.2s
38	%tmp1 = load <2 x float>, <2 x float>* %A
39	%tmp2 = load <2 x float>, <2 x float>* %B
40	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
41	ret <2 x float> %tmp3
42}
43
44define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
45;CHECK-LABEL: frsqrts_4s:
46;CHECK: frsqrts.4s
47	%tmp1 = load <4 x float>, <4 x float>* %A
48	%tmp2 = load <4 x float>, <4 x float>* %B
49	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
50	ret <4 x float> %tmp3
51}
52
53define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
54;CHECK-LABEL: frsqrts_2d:
55;CHECK: frsqrts.2d
56	%tmp1 = load <2 x double>, <2 x double>* %A
57	%tmp2 = load <2 x double>, <2 x double>* %B
58	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
59	ret <2 x double> %tmp3
60}
61
62declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
63declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
64declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
65
66define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
67;CHECK-LABEL: frecpe_2s:
68;CHECK: frecpe.2s
69	%tmp1 = load <2 x float>, <2 x float>* %A
70	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
71	ret <2 x float> %tmp3
72}
73
74define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
75;CHECK-LABEL: frecpe_4s:
76;CHECK: frecpe.4s
77	%tmp1 = load <4 x float>, <4 x float>* %A
78	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
79	ret <4 x float> %tmp3
80}
81
82define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
83;CHECK-LABEL: frecpe_2d:
84;CHECK: frecpe.2d
85	%tmp1 = load <2 x double>, <2 x double>* %A
86	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
87	ret <2 x double> %tmp3
88}
89
90define float @frecpe_s(float* %A) nounwind {
91;CHECK-LABEL: frecpe_s:
92;CHECK: frecpe s0, {{s[0-9]+}}
93  %tmp1 = load float, float* %A
94  %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
95  ret float %tmp3
96}
97
98define double @frecpe_d(double* %A) nounwind {
99;CHECK-LABEL: frecpe_d:
100;CHECK: frecpe d0, {{d[0-9]+}}
101  %tmp1 = load double, double* %A
102  %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
103  ret double %tmp3
104}
105
106declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
107declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
108declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
109declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
110declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
111
112define float @frecpx_s(float* %A) nounwind {
113;CHECK-LABEL: frecpx_s:
114;CHECK: frecpx s0, {{s[0-9]+}}
115  %tmp1 = load float, float* %A
116  %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
117  ret float %tmp3
118}
119
120define double @frecpx_d(double* %A) nounwind {
121;CHECK-LABEL: frecpx_d:
122;CHECK: frecpx d0, {{d[0-9]+}}
123  %tmp1 = load double, double* %A
124  %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
125  ret double %tmp3
126}
127
128declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
129declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
130
131define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
132;CHECK-LABEL: frsqrte_2s:
133;CHECK: frsqrte.2s
134	%tmp1 = load <2 x float>, <2 x float>* %A
135	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
136	ret <2 x float> %tmp3
137}
138
139define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
140;CHECK-LABEL: frsqrte_4s:
141;CHECK: frsqrte.4s
142	%tmp1 = load <4 x float>, <4 x float>* %A
143	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
144	ret <4 x float> %tmp3
145}
146
147define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
148;CHECK-LABEL: frsqrte_2d:
149;CHECK: frsqrte.2d
150	%tmp1 = load <2 x double>, <2 x double>* %A
151	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
152	ret <2 x double> %tmp3
153}
154
155define float @frsqrte_s(float* %A) nounwind {
156;CHECK-LABEL: frsqrte_s:
157;CHECK: frsqrte s0, {{s[0-9]+}}
158  %tmp1 = load float, float* %A
159  %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
160  ret float %tmp3
161}
162
163define double @frsqrte_d(double* %A) nounwind {
164;CHECK-LABEL: frsqrte_d:
165;CHECK: frsqrte d0, {{d[0-9]+}}
166  %tmp1 = load double, double* %A
167  %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
168  ret double %tmp3
169}
170
171declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
172declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
173declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
174declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
175declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
176
177define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
178;CHECK-LABEL: urecpe_2s:
179;CHECK: urecpe.2s
180	%tmp1 = load <2 x i32>, <2 x i32>* %A
181	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
182	ret <2 x i32> %tmp3
183}
184
185define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
186;CHECK-LABEL: urecpe_4s:
187;CHECK: urecpe.4s
188	%tmp1 = load <4 x i32>, <4 x i32>* %A
189	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
190	ret <4 x i32> %tmp3
191}
192
193declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
194declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
195
196define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
197;CHECK-LABEL: ursqrte_2s:
198;CHECK: ursqrte.2s
199	%tmp1 = load <2 x i32>, <2 x i32>* %A
200	%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
201	ret <2 x i32> %tmp3
202}
203
204define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
205;CHECK-LABEL: ursqrte_4s:
206;CHECK: ursqrte.4s
207	%tmp1 = load <4 x i32>, <4 x i32>* %A
208	%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
209	ret <4 x i32> %tmp3
210}
211
212declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
213declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
214
215define float @f1(float %a, float %b) nounwind readnone optsize ssp {
216; CHECK-LABEL: f1:
217; CHECK: frsqrts s0, s0, s1
218; CHECK-NEXT: ret
219  %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
220  ret float %vrsqrtss.i
221}
222
223define double @f2(double %a, double %b) nounwind readnone optsize ssp {
224; CHECK-LABEL: f2:
225; CHECK: frsqrts d0, d0, d1
226; CHECK-NEXT: ret
227  %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
228  ret double %vrsqrtsd.i
229}
230
231declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
232declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone
233