1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
2
3
4 #include <x86intrin.h>
5
test_mm_macc_ps(__m128 a,__m128 b,__m128 c)6 __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
7 // CHECK-LABEL: test_mm_macc_ps
8 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
9 return _mm_macc_ps(a, b, c);
10 }
11
test_mm_macc_pd(__m128d a,__m128d b,__m128d c)12 __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
13 // CHECK-LABEL: test_mm_macc_pd
14 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
15 return _mm_macc_pd(a, b, c);
16 }
17
test_mm_macc_ss(__m128 a,__m128 b,__m128 c)18 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
19 // CHECK-LABEL: test_mm_macc_ss
20 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
21 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
23 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
24 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
25 return _mm_macc_ss(a, b, c);
26 }
27
test_mm_macc_sd(__m128d a,__m128d b,__m128d c)28 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
29 // CHECK-LABEL: test_mm_macc_sd
30 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
31 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
32 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
33 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
34 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
35 return _mm_macc_sd(a, b, c);
36 }
37
test_mm_msub_ps(__m128 a,__m128 b,__m128 c)38 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
39 // CHECK-LABEL: test_mm_msub_ps
40 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
41 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
42 return _mm_msub_ps(a, b, c);
43 }
44
test_mm_msub_pd(__m128d a,__m128d b,__m128d c)45 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
46 // CHECK-LABEL: test_mm_msub_pd
47 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
48 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
49 return _mm_msub_pd(a, b, c);
50 }
51
test_mm_msub_ss(__m128 a,__m128 b,__m128 c)52 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
53 // CHECK-LABEL: test_mm_msub_ss
54 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
55 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
56 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
57 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
58 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
59 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
60 return _mm_msub_ss(a, b, c);
61 }
62
test_mm_msub_sd(__m128d a,__m128d b,__m128d c)63 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
64 // CHECK-LABEL: test_mm_msub_sd
65 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
66 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
67 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
68 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
69 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
70 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
71 return _mm_msub_sd(a, b, c);
72 }
73
test_mm_nmacc_ps(__m128 a,__m128 b,__m128 c)74 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
75 // CHECK-LABEL: test_mm_nmacc_ps
76 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
77 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
78 return _mm_nmacc_ps(a, b, c);
79 }
80
test_mm_nmacc_pd(__m128d a,__m128d b,__m128d c)81 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
82 // CHECK-LABEL: test_mm_nmacc_pd
83 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
84 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
85 return _mm_nmacc_pd(a, b, c);
86 }
87
test_mm_nmacc_ss(__m128 a,__m128 b,__m128 c)88 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
89 // CHECK-LABEL: test_mm_nmacc_ss
90 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
91 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
92 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
93 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
94 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
95 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
96 return _mm_nmacc_ss(a, b, c);
97 }
98
test_mm_nmacc_sd(__m128d a,__m128d b,__m128d c)99 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
100 // CHECK-LABEL: test_mm_nmacc_sd
101 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
102 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
103 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
104 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
105 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
106 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
107 return _mm_nmacc_sd(a, b, c);
108 }
109
test_mm_nmsub_ps(__m128 a,__m128 b,__m128 c)110 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
111 // CHECK-LABEL: test_mm_nmsub_ps
112 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
113 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
114 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
115 return _mm_nmsub_ps(a, b, c);
116 }
117
test_mm_nmsub_pd(__m128d a,__m128d b,__m128d c)118 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
119 // CHECK-LABEL: test_mm_nmsub_pd
120 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
121 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
122 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
123 return _mm_nmsub_pd(a, b, c);
124 }
125
test_mm_nmsub_ss(__m128 a,__m128 b,__m128 c)126 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
127 // CHECK-LABEL: test_mm_nmsub_ss
128 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
129 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
130 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
131 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
132 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
133 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
134 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
135 return _mm_nmsub_ss(a, b, c);
136 }
137
test_mm_nmsub_sd(__m128d a,__m128d b,__m128d c)138 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
139 // CHECK-LABEL: test_mm_nmsub_sd
140 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
141 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
142 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
143 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
144 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
145 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
146 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
147 return _mm_nmsub_sd(a, b, c);
148 }
149
test_mm_maddsub_ps(__m128 a,__m128 b,__m128 c)150 __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
151 // CHECK-LABEL: test_mm_maddsub_ps
152 // CHECK-NOT: fneg
153 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
154 return _mm_maddsub_ps(a, b, c);
155 }
156
test_mm_maddsub_pd(__m128d a,__m128d b,__m128d c)157 __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
158 // CHECK-LABEL: test_mm_maddsub_pd
159 // CHECK-NOT: fneg
160 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
161 return _mm_maddsub_pd(a, b, c);
162 }
163
test_mm_msubadd_ps(__m128 a,__m128 b,__m128 c)164 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
165 // CHECK-LABEL: test_mm_msubadd_ps
166 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
167 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
168 return _mm_msubadd_ps(a, b, c);
169 }
170
test_mm_msubadd_pd(__m128d a,__m128d b,__m128d c)171 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
172 // CHECK-LABEL: test_mm_msubadd_pd
173 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
174 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
175 return _mm_msubadd_pd(a, b, c);
176 }
177
test_mm256_macc_ps(__m256 a,__m256 b,__m256 c)178 __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
179 // CHECK-LABEL: test_mm256_macc_ps
180 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
181 return _mm256_macc_ps(a, b, c);
182 }
183
test_mm256_macc_pd(__m256d a,__m256d b,__m256d c)184 __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
185 // CHECK-LABEL: test_mm256_macc_pd
186 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
187 return _mm256_macc_pd(a, b, c);
188 }
189
test_mm256_msub_ps(__m256 a,__m256 b,__m256 c)190 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
191 // CHECK-LABEL: test_mm256_msub_ps
192 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
193 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
194 return _mm256_msub_ps(a, b, c);
195 }
196
test_mm256_msub_pd(__m256d a,__m256d b,__m256d c)197 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
198 // CHECK-LABEL: test_mm256_msub_pd
199 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
200 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
201 return _mm256_msub_pd(a, b, c);
202 }
203
test_mm256_nmacc_ps(__m256 a,__m256 b,__m256 c)204 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
205 // CHECK-LABEL: test_mm256_nmacc_ps
206 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
207 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
208 return _mm256_nmacc_ps(a, b, c);
209 }
210
test_mm256_nmacc_pd(__m256d a,__m256d b,__m256d c)211 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
212 // CHECK-LABEL: test_mm256_nmacc_pd
213 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
214 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
215 return _mm256_nmacc_pd(a, b, c);
216 }
217
test_mm256_nmsub_ps(__m256 a,__m256 b,__m256 c)218 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
219 // CHECK-LABEL: test_mm256_nmsub_ps
220 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
221 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
222 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
223 return _mm256_nmsub_ps(a, b, c);
224 }
225
test_mm256_nmsub_pd(__m256d a,__m256d b,__m256d c)226 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
227 // CHECK-LABEL: test_mm256_nmsub_pd
228 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
229 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
230 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
231 return _mm256_nmsub_pd(a, b, c);
232 }
233
test_mm256_maddsub_ps(__m256 a,__m256 b,__m256 c)234 __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
235 // CHECK-LABEL: test_mm256_maddsub_ps
236 // CHECK-NOT: fneg
237 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
238 return _mm256_maddsub_ps(a, b, c);
239 }
240
test_mm256_maddsub_pd(__m256d a,__m256d b,__m256d c)241 __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
242 // CHECK-LABEL: test_mm256_maddsub_pd
243 // CHECK-NOT: fneg
244 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
245 return _mm256_maddsub_pd(a, b, c);
246 }
247
test_mm256_msubadd_ps(__m256 a,__m256 b,__m256 c)248 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
249 // CHECK-LABEL: test_mm256_msubadd_ps
250 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
251 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
252 return _mm256_msubadd_ps(a, b, c);
253 }
254
test_mm256_msubadd_pd(__m256d a,__m256d b,__m256d c)255 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
256 // CHECK-LABEL: test_mm256_msubadd_pd
257 // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
258 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
259 return _mm256_msubadd_pd(a, b, c);
260 }
261