1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Werror | FileCheck %s
2
3 // Don't include mm_malloc.h, it's system specific.
4 #define __MM_MALLOC_H
5
6 #include <x86intrin.h>
7
8 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
9
test_mm_add_ps(__m128 A,__m128 B)10 __m128 test_mm_add_ps(__m128 A, __m128 B) {
11 // CHECK-LABEL: test_mm_add_ps
12 // CHECK: fadd <4 x float>
13 return _mm_add_ps(A, B);
14 }
15
test_mm_add_ss(__m128 A,__m128 B)16 __m128 test_mm_add_ss(__m128 A, __m128 B) {
17 // CHECK-LABEL: test_mm_add_ss
18 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
19 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
20 // CHECK: fadd float
21 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
22 return _mm_add_ss(A, B);
23 }
24
test_mm_and_ps(__m128 A,__m128 B)25 __m128 test_mm_and_ps(__m128 A, __m128 B) {
26 // CHECK-LABEL: test_mm_and_ps
27 // CHECK: and <4 x i32>
28 return _mm_and_ps(A, B);
29 }
30
test_mm_andnot_ps(__m128 A,__m128 B)31 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
32 // CHECK-LABEL: test_mm_andnot_ps
33 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
34 // CHECK: and <4 x i32>
35 return _mm_andnot_ps(A, B);
36 }
37
test_mm_cmpeq_ps(__m128 __a,__m128 __b)38 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
39 // CHECK-LABEL: @test_mm_cmpeq_ps
40 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
41 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
42 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
43 // CHECK-NEXT: ret <4 x float> [[BC]]
44 return _mm_cmpeq_ps(__a, __b);
45 }
46
test_mm_cmpeq_ss(__m128 __a,__m128 __b)47 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
48 // CHECK-LABEL: @test_mm_cmpeq_ss
49 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
50 return _mm_cmpeq_ss(__a, __b);
51 }
52
test_mm_cmpge_ps(__m128 __a,__m128 __b)53 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
54 // CHECK-LABEL: @test_mm_cmpge_ps
55 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
56 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
57 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
58 // CHECK-NEXT: ret <4 x float> [[BC]]
59 return _mm_cmpge_ps(__a, __b);
60 }
61
test_mm_cmpge_ss(__m128 __a,__m128 __b)62 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
63 // CHECK-LABEL: @test_mm_cmpge_ss
64 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
65 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
66 return _mm_cmpge_ss(__a, __b);
67 }
68
test_mm_cmpgt_ps(__m128 __a,__m128 __b)69 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
70 // CHECK-LABEL: @test_mm_cmpgt_ps
71 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
72 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
73 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
74 // CHECK-NEXT: ret <4 x float> [[BC]]
75 return _mm_cmpgt_ps(__a, __b);
76 }
77
test_mm_cmpgt_ss(__m128 __a,__m128 __b)78 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
79 // CHECK-LABEL: @test_mm_cmpgt_ss
80 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
81 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
82 return _mm_cmpgt_ss(__a, __b);
83 }
84
test_mm_cmple_ps(__m128 __a,__m128 __b)85 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
86 // CHECK-LABEL: @test_mm_cmple_ps
87 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
88 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
89 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
90 // CHECK-NEXT: ret <4 x float> [[BC]]
91 return _mm_cmple_ps(__a, __b);
92 }
93
test_mm_cmple_ss(__m128 __a,__m128 __b)94 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
95 // CHECK-LABEL: @test_mm_cmple_ss
96 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
97 return _mm_cmple_ss(__a, __b);
98 }
99
test_mm_cmplt_ps(__m128 __a,__m128 __b)100 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
101 // CHECK-LABEL: @test_mm_cmplt_ps
102 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
103 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
104 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
105 // CHECK-NEXT: ret <4 x float> [[BC]]
106 return _mm_cmplt_ps(__a, __b);
107 }
108
test_mm_cmplt_ss(__m128 __a,__m128 __b)109 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
110 // CHECK-LABEL: @test_mm_cmplt_ss
111 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
112 return _mm_cmplt_ss(__a, __b);
113 }
114
test_mm_cmpneq_ps(__m128 __a,__m128 __b)115 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
116 // CHECK-LABEL: @test_mm_cmpneq_ps
117 // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
118 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
119 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
120 // CHECK-NEXT: ret <4 x float> [[BC]]
121 return _mm_cmpneq_ps(__a, __b);
122 }
123
test_mm_cmpneq_ss(__m128 __a,__m128 __b)124 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
125 // CHECK-LABEL: @test_mm_cmpneq_ss
126 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
127 return _mm_cmpneq_ss(__a, __b);
128 }
129
test_mm_cmpnge_ps(__m128 __a,__m128 __b)130 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
131 // CHECK-LABEL: @test_mm_cmpnge_ps
132 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
133 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
134 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
135 // CHECK-NEXT: ret <4 x float> [[BC]]
136 return _mm_cmpnge_ps(__a, __b);
137 }
138
test_mm_cmpnge_ss(__m128 __a,__m128 __b)139 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
140 // CHECK-LABEL: @test_mm_cmpnge_ss
141 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
142 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
143 return _mm_cmpnge_ss(__a, __b);
144 }
145
test_mm_cmpngt_ps(__m128 __a,__m128 __b)146 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
147 // CHECK-LABEL: @test_mm_cmpngt_ps
148 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
149 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
150 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
151 // CHECK-NEXT: ret <4 x float> [[BC]]
152 return _mm_cmpngt_ps(__a, __b);
153 }
154
test_mm_cmpngt_ss(__m128 __a,__m128 __b)155 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
156 // CHECK-LABEL: @test_mm_cmpngt_ss
157 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
158 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
159 return _mm_cmpngt_ss(__a, __b);
160 }
161
test_mm_cmpnle_ps(__m128 __a,__m128 __b)162 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
163 // CHECK-LABEL: @test_mm_cmpnle_ps
164 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
165 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
166 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
167 // CHECK-NEXT: ret <4 x float> [[BC]]
168 return _mm_cmpnle_ps(__a, __b);
169 }
170
test_mm_cmpnle_ss(__m128 __a,__m128 __b)171 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
172 // CHECK-LABEL: @test_mm_cmpnle_ss
173 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
174 return _mm_cmpnle_ss(__a, __b);
175 }
176
test_mm_cmpnlt_ps(__m128 __a,__m128 __b)177 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
178 // CHECK-LABEL: @test_mm_cmpnlt_ps
179 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
180 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
181 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
182 // CHECK-NEXT: ret <4 x float> [[BC]]
183 return _mm_cmpnlt_ps(__a, __b);
184 }
185
test_mm_cmpnlt_ss(__m128 __a,__m128 __b)186 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
187 // CHECK-LABEL: @test_mm_cmpnlt_ss
188 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
189 return _mm_cmpnlt_ss(__a, __b);
190 }
191
test_mm_cmpord_ps(__m128 __a,__m128 __b)192 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
193 // CHECK-LABEL: @test_mm_cmpord_ps
194 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
195 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
196 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
197 // CHECK-NEXT: ret <4 x float> [[BC]]
198 return _mm_cmpord_ps(__a, __b);
199 }
200
test_mm_cmpord_ss(__m128 __a,__m128 __b)201 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
202 // CHECK-LABEL: @test_mm_cmpord_ss
203 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
204 return _mm_cmpord_ss(__a, __b);
205 }
206
test_mm_cmpunord_ps(__m128 __a,__m128 __b)207 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
208 // CHECK-LABEL: @test_mm_cmpunord_ps
209 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
210 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
211 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
212 // CHECK-NEXT: ret <4 x float> [[BC]]
213 return _mm_cmpunord_ps(__a, __b);
214 }
215
test_mm_cmpunord_ss(__m128 __a,__m128 __b)216 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
217 // CHECK-LABEL: @test_mm_cmpunord_ss
218 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
219 return _mm_cmpunord_ss(__a, __b);
220 }
221
test_mm_comieq_ss(__m128 A,__m128 B)222 int test_mm_comieq_ss(__m128 A, __m128 B) {
223 // CHECK-LABEL: test_mm_comieq_ss
224 // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
225 return _mm_comieq_ss(A, B);
226 }
227
test_mm_comige_ss(__m128 A,__m128 B)228 int test_mm_comige_ss(__m128 A, __m128 B) {
229 // CHECK-LABEL: test_mm_comige_ss
230 // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
231 return _mm_comige_ss(A, B);
232 }
233
test_mm_comigt_ss(__m128 A,__m128 B)234 int test_mm_comigt_ss(__m128 A, __m128 B) {
235 // CHECK-LABEL: test_mm_comigt_ss
236 // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
237 return _mm_comigt_ss(A, B);
238 }
239
test_mm_comile_ss(__m128 A,__m128 B)240 int test_mm_comile_ss(__m128 A, __m128 B) {
241 // CHECK-LABEL: test_mm_comile_ss
242 // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
243 return _mm_comile_ss(A, B);
244 }
245
test_mm_comilt_ss(__m128 A,__m128 B)246 int test_mm_comilt_ss(__m128 A, __m128 B) {
247 // CHECK-LABEL: test_mm_comilt_ss
248 // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
249 return _mm_comilt_ss(A, B);
250 }
251
test_mm_comineq_ss(__m128 A,__m128 B)252 int test_mm_comineq_ss(__m128 A, __m128 B) {
253 // CHECK-LABEL: test_mm_comineq_ss
254 // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
255 return _mm_comineq_ss(A, B);
256 }
257
test_mm_cvt_ss2si(__m128 A)258 int test_mm_cvt_ss2si(__m128 A) {
259 // CHECK-LABEL: test_mm_cvt_ss2si
260 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
261 return _mm_cvt_ss2si(A);
262 }
263
test_mm_cvtsi32_ss(__m128 A,int B)264 __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
265 // CHECK-LABEL: test_mm_cvtsi32_ss
266 // CHECK: sitofp i32 %{{.*}} to float
267 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
268 return _mm_cvtsi32_ss(A, B);
269 }
270
test_mm_cvtsi64_ss(__m128 A,long long B)271 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
272 // CHECK-LABEL: test_mm_cvtsi64_ss
273 // CHECK: sitofp i64 %{{.*}} to float
274 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
275 return _mm_cvtsi64_ss(A, B);
276 }
277
test_mm_cvtss_f32(__m128 A)278 float test_mm_cvtss_f32(__m128 A) {
279 // CHECK-LABEL: test_mm_cvtss_f32
280 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
281 return _mm_cvtss_f32(A);
282 }
283
test_mm_cvtss_si32(__m128 A)284 int test_mm_cvtss_si32(__m128 A) {
285 // CHECK-LABEL: test_mm_cvtss_si32
286 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
287 return _mm_cvtss_si32(A);
288 }
289
test_mm_cvtss_si64(__m128 A)290 long long test_mm_cvtss_si64(__m128 A) {
291 // CHECK-LABEL: test_mm_cvtss_si64
292 // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
293 return _mm_cvtss_si64(A);
294 }
295
test_mm_cvtt_ss2si(__m128 A)296 int test_mm_cvtt_ss2si(__m128 A) {
297 // CHECK-LABEL: test_mm_cvtt_ss2si
298 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
299 // CHECK: fptosi float %{{.*}} to i32
300 return _mm_cvtt_ss2si(A);
301 }
302
test_mm_cvttss_si32(__m128 A)303 int test_mm_cvttss_si32(__m128 A) {
304 // CHECK-LABEL: test_mm_cvttss_si32
305 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
306 // CHECK: fptosi float %{{.*}} to i32
307 return _mm_cvttss_si32(A);
308 }
309
test_mm_cvttss_si64(__m128 A)310 long long test_mm_cvttss_si64(__m128 A) {
311 // CHECK-LABEL: test_mm_cvttss_si64
312 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
313 // CHECK: fptosi float %{{.*}} to i64
314 return _mm_cvttss_si64(A);
315 }
316
test_mm_div_ps(__m128 A,__m128 B)317 __m128 test_mm_div_ps(__m128 A, __m128 B) {
318 // CHECK-LABEL: test_mm_div_ps
319 // CHECK: fdiv <4 x float>
320 return _mm_div_ps(A, B);
321 }
322
test_mm_div_ss(__m128 A,__m128 B)323 __m128 test_mm_div_ss(__m128 A, __m128 B) {
324 // CHECK-LABEL: test_mm_div_ss
325 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
326 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
327 // CHECK: fdiv float
328 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
329 return _mm_div_ss(A, B);
330 }
331
test_MM_GET_EXCEPTION_MASK()332 unsigned int test_MM_GET_EXCEPTION_MASK() {
333 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
334 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
335 // CHECK: and i32 %{{.*}}, 8064
336 return _MM_GET_EXCEPTION_MASK();
337 }
338
test_MM_GET_EXCEPTION_STATE()339 unsigned int test_MM_GET_EXCEPTION_STATE() {
340 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
341 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
342 // CHECK: and i32 %{{.*}}, 63
343 return _MM_GET_EXCEPTION_STATE();
344 }
345
test_MM_GET_FLUSH_ZERO_MODE()346 unsigned int test_MM_GET_FLUSH_ZERO_MODE() {
347 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
348 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
349 // CHECK: and i32 %{{.*}}, 32768
350 return _MM_GET_FLUSH_ZERO_MODE();
351 }
352
test_MM_GET_ROUNDING_MODE()353 unsigned int test_MM_GET_ROUNDING_MODE() {
354 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
355 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
356 // CHECK: and i32 %{{.*}}, 24576
357 return _MM_GET_ROUNDING_MODE();
358 }
359
test_mm_getcsr()360 unsigned int test_mm_getcsr() {
361 // CHECK-LABEL: test_mm_getcsr
362 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
363 // CHECK: load i32
364 return _mm_getcsr();
365 }
366
test_mm_load_ps(float * y)367 __m128 test_mm_load_ps(float* y) {
368 // CHECK-LABEL: test_mm_load_ps
369 // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16
370 return _mm_load_ps(y);
371 }
372
test_mm_load_ps1(float * y)373 __m128 test_mm_load_ps1(float* y) {
374 // CHECK-LABEL: test_mm_load_ps1
375 // CHECK: load float, float* %{{.*}}, align 4
376 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
377 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
378 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
379 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
380 return _mm_load_ps1(y);
381 }
382
test_mm_load_ss(float * y)383 __m128 test_mm_load_ss(float* y) {
384 // CHECK-LABEL: test_mm_load_ss
385 // CHECK: load float, float* {{.*}}, align 1{{$}}
386 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
387 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
388 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
389 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
390 return _mm_load_ss(y);
391 }
392
test_mm_load1_ps(float * y)393 __m128 test_mm_load1_ps(float* y) {
394 // CHECK-LABEL: test_mm_load1_ps
395 // CHECK: load float, float* %{{.*}}, align 4
396 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
397 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
398 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
399 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
400 return _mm_load1_ps(y);
401 }
402
test_mm_loadh_pi(__m128 x,__m64 * y)403 __m128 test_mm_loadh_pi(__m128 x, __m64* y) {
404 // CHECK-LABEL: test_mm_loadh_pi
405 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
406 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
407 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
408 return _mm_loadh_pi(x,y);
409 }
410
test_mm_loadl_pi(__m128 x,__m64 * y)411 __m128 test_mm_loadl_pi(__m128 x, __m64* y) {
412 // CHECK-LABEL: test_mm_loadl_pi
413 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
414 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
415 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
416 return _mm_loadl_pi(x,y);
417 }
418
test_mm_loadr_ps(float * A)419 __m128 test_mm_loadr_ps(float* A) {
420 // CHECK-LABEL: test_mm_loadr_ps
421 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
422 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
423 return _mm_loadr_ps(A);
424 }
425
test_mm_loadu_ps(float * A)426 __m128 test_mm_loadu_ps(float* A) {
427 // CHECK-LABEL: test_mm_loadu_ps
428 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
429 return _mm_loadu_ps(A);
430 }
431
test_mm_max_ps(__m128 A,__m128 B)432 __m128 test_mm_max_ps(__m128 A, __m128 B) {
433 // CHECK-LABEL: test_mm_max_ps
434 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
435 return _mm_max_ps(A, B);
436 }
437
test_mm_max_ss(__m128 A,__m128 B)438 __m128 test_mm_max_ss(__m128 A, __m128 B) {
439 // CHECK-LABEL: test_mm_max_ss
440 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
441 return _mm_max_ss(A, B);
442 }
443
test_mm_min_ps(__m128 A,__m128 B)444 __m128 test_mm_min_ps(__m128 A, __m128 B) {
445 // CHECK-LABEL: test_mm_min_ps
446 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
447 return _mm_min_ps(A, B);
448 }
449
test_mm_min_ss(__m128 A,__m128 B)450 __m128 test_mm_min_ss(__m128 A, __m128 B) {
451 // CHECK-LABEL: test_mm_min_ss
452 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
453 return _mm_min_ss(A, B);
454 }
455
test_mm_move_ss(__m128 A,__m128 B)456 __m128 test_mm_move_ss(__m128 A, __m128 B) {
457 // CHECK-LABEL: test_mm_move_ss
458 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
459 return _mm_move_ss(A, B);
460 }
461
test_mm_movehl_ps(__m128 A,__m128 B)462 __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
463 // CHECK-LABEL: test_mm_movehl_ps
464 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
465 return _mm_movehl_ps(A, B);
466 }
467
test_mm_movelh_ps(__m128 A,__m128 B)468 __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
469 // CHECK-LABEL: test_mm_movelh_ps
470 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
471 return _mm_movelh_ps(A, B);
472 }
473
test_mm_movemask_ps(__m128 A)474 int test_mm_movemask_ps(__m128 A) {
475 // CHECK-LABEL: test_mm_movemask_ps
476 // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
477 return _mm_movemask_ps(A);
478 }
479
test_mm_mul_ps(__m128 A,__m128 B)480 __m128 test_mm_mul_ps(__m128 A, __m128 B) {
481 // CHECK-LABEL: test_mm_mul_ps
482 // CHECK: fmul <4 x float>
483 return _mm_mul_ps(A, B);
484 }
485
test_mm_mul_ss(__m128 A,__m128 B)486 __m128 test_mm_mul_ss(__m128 A, __m128 B) {
487 // CHECK-LABEL: test_mm_mul_ss
488 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
489 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
490 // CHECK: fmul float
491 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
492 return _mm_mul_ss(A, B);
493 }
494
test_mm_or_ps(__m128 A,__m128 B)495 __m128 test_mm_or_ps(__m128 A, __m128 B) {
496 // CHECK-LABEL: test_mm_or_ps
497 // CHECK: or <4 x i32>
498 return _mm_or_ps(A, B);
499 }
500
test_mm_prefetch(char const * p)501 void test_mm_prefetch(char const* p) {
502 // CHECK-LABEL: test_mm_prefetch
503 // CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1)
504 _mm_prefetch(p, 0);
505 }
506
test_mm_rcp_ps(__m128 x)507 __m128 test_mm_rcp_ps(__m128 x) {
508 // CHECK-LABEL: test_mm_rcp_ps
509 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
510 return _mm_rcp_ps(x);
511 }
512
test_mm_rcp_ss(__m128 x)513 __m128 test_mm_rcp_ss(__m128 x) {
514 // CHECK-LABEL: test_mm_rcp_ss
515 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
516 // CHECK: extractelement <4 x float> {{.*}}, i32 0
517 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
518 // CHECK: extractelement <4 x float> {{.*}}, i32 1
519 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
520 // CHECK: extractelement <4 x float> {{.*}}, i32 2
521 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
522 // CHECK: extractelement <4 x float> {{.*}}, i32 3
523 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
524 return _mm_rcp_ss(x);
525 }
526
test_mm_rsqrt_ps(__m128 x)527 __m128 test_mm_rsqrt_ps(__m128 x) {
528 // CHECK-LABEL: test_mm_rsqrt_ps
529 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
530 return _mm_rsqrt_ps(x);
531 }
532
test_mm_rsqrt_ss(__m128 x)533 __m128 test_mm_rsqrt_ss(__m128 x) {
534 // CHECK-LABEL: test_mm_rsqrt_ss
535 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
536 // CHECK: extractelement <4 x float> {{.*}}, i32 0
537 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
538 // CHECK: extractelement <4 x float> {{.*}}, i32 1
539 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
540 // CHECK: extractelement <4 x float> {{.*}}, i32 2
541 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
542 // CHECK: extractelement <4 x float> {{.*}}, i32 3
543 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
544 return _mm_rsqrt_ss(x);
545 }
546
test_MM_SET_EXCEPTION_MASK(unsigned int A)547 void test_MM_SET_EXCEPTION_MASK(unsigned int A) {
548 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
549 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
550 // CHECK: load i32
551 // CHECK: and i32 {{.*}}, -8065
552 // CHECK: or i32
553 // CHECK: store i32
554 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
555 _MM_SET_EXCEPTION_MASK(A);
556 }
557
test_MM_SET_EXCEPTION_STATE(unsigned int A)558 void test_MM_SET_EXCEPTION_STATE(unsigned int A) {
559 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
560 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
561 // CHECK: load i32
562 // CHECK: and i32 {{.*}}, -64
563 // CHECK: or i32
564 // CHECK: store i32
565 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
566 _MM_SET_EXCEPTION_STATE(A);
567 }
568
test_MM_SET_FLUSH_ZERO_MODE(unsigned int A)569 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) {
570 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
571 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
572 // CHECK: load i32
573 // CHECK: and i32 {{.*}}, -32769
574 // CHECK: or i32
575 // CHECK: store i32
576 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
577 _MM_SET_FLUSH_ZERO_MODE(A);
578 }
579
test_mm_set_ps(float A,float B,float C,float D)580 __m128 test_mm_set_ps(float A, float B, float C, float D) {
581 // CHECK-LABEL: test_mm_set_ps
582 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
583 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
584 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
585 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
586 return _mm_set_ps(A, B, C, D);
587 }
588
test_mm_set_ps1(float A)589 __m128 test_mm_set_ps1(float A) {
590 // CHECK-LABEL: test_mm_set_ps1
591 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
592 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
593 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
594 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
595 return _mm_set_ps1(A);
596 }
597
test_MM_SET_ROUNDING_MODE(unsigned int A)598 void test_MM_SET_ROUNDING_MODE(unsigned int A) {
599 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
600 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
601 // CHECK: load i32
602 // CHECK: and i32 {{.*}}, -24577
603 // CHECK: or i32
604 // CHECK: store i32
605 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
606 _MM_SET_ROUNDING_MODE(A);
607 }
608
test_mm_set_ss(float A)609 __m128 test_mm_set_ss(float A) {
610 // CHECK-LABEL: test_mm_set_ss
611 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
612 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
613 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
614 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
615 return _mm_set_ss(A);
616 }
617
test_mm_set1_ps(float A)618 __m128 test_mm_set1_ps(float A) {
619 // CHECK-LABEL: test_mm_set1_ps
620 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
621 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
622 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
623 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
624 return _mm_set1_ps(A);
625 }
626
test_mm_setcsr(unsigned int A)627 void test_mm_setcsr(unsigned int A) {
628 // CHECK-LABEL: test_mm_setcsr
629 // CHECK: store i32
630 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
631 _mm_setcsr(A);
632 }
633
test_mm_setr_ps(float A,float B,float C,float D)634 __m128 test_mm_setr_ps(float A, float B, float C, float D) {
635 // CHECK-LABEL: test_mm_setr_ps
636 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
637 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
638 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
639 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
640 return _mm_setr_ps(A, B, C, D);
641 }
642
test_mm_setzero_ps()643 __m128 test_mm_setzero_ps() {
644 // CHECK-LABEL: test_mm_setzero_ps
645 // CHECK: store <4 x float> zeroinitializer
646 return _mm_setzero_ps();
647 }
648
test_mm_sfence()649 void test_mm_sfence() {
650 // CHECK-LABEL: test_mm_sfence
651 // CHECK: call void @llvm.x86.sse.sfence()
652 _mm_sfence();
653 }
654
test_mm_shuffle_ps(__m128 A,__m128 B)655 __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
656 // CHECK-LABEL: test_mm_shuffle_ps
657 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
658 return _mm_shuffle_ps(A, B, 0);
659 }
660
test_mm_sqrt_ps(__m128 x)661 __m128 test_mm_sqrt_ps(__m128 x) {
662 // CHECK-LABEL: test_mm_sqrt_ps
663 // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> {{.*}})
664 return _mm_sqrt_ps(x);
665 }
666
test_sqrt_ss(__m128 x)667 __m128 test_sqrt_ss(__m128 x) {
668 // CHECK: define {{.*}} @test_sqrt_ss
669 // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
670 // CHECK: extractelement <4 x float> {{.*}}, i32 0
671 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
672 // CHECK: extractelement <4 x float> {{.*}}, i32 1
673 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
674 // CHECK: extractelement <4 x float> {{.*}}, i32 2
675 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
676 // CHECK: extractelement <4 x float> {{.*}}, i32 3
677 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
678 return _mm_sqrt_ss(x);
679 }
680
test_mm_store_ps(float * x,__m128 y)681 void test_mm_store_ps(float* x, __m128 y) {
682 // CHECK-LABEL: test_mm_store_ps
683 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
684 _mm_store_ps(x, y);
685 }
686
test_mm_store_ps1(float * x,__m128 y)687 void test_mm_store_ps1(float* x, __m128 y) {
688 // CHECK-LABEL: test_mm_store_ps1
689 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
690 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
691 _mm_store_ps1(x, y);
692 }
693
test_mm_store_ss(float * x,__m128 y)694 void test_mm_store_ss(float* x, __m128 y) {
695 // CHECK-LABEL: test_mm_store_ss
696 // CHECK: extractelement <4 x float> {{.*}}, i32 0
697 // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}}
698 _mm_store_ss(x, y);
699 }
700
test_mm_store1_ps(float * x,__m128 y)701 void test_mm_store1_ps(float* x, __m128 y) {
702 // CHECK-LABEL: test_mm_store1_ps
703 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
704 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
705 _mm_store1_ps(x, y);
706 }
707
test_mm_storeh_pi(__m64 * x,__m128 y)708 void test_mm_storeh_pi(__m64* x, __m128 y) {
709 // CHECK-LABEL: test_mm_storeh_pi
710 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
711 // CHECK: extractelement <2 x i64> %{{.*}}, i64 1
712 // CHECK: store i64 %{{.*}}, i64* {{.*}}
713 _mm_storeh_pi(x, y);
714 }
715
test_mm_storel_pi(__m64 * x,__m128 y)716 void test_mm_storel_pi(__m64* x, __m128 y) {
717 // CHECK-LABEL: test_mm_storel_pi
718 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
719 // CHECK: extractelement <2 x i64> %{{.*}}, i64 0
720 // CHECK: store i64 %{{.*}}, i64* {{.*}}
721 _mm_storel_pi(x, y);
722 }
723
test_mm_storer_ps(float * x,__m128 y)724 void test_mm_storer_ps(float* x, __m128 y) {
725 // CHECK-LABEL: test_mm_storer_ps
726 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
727 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
728 _mm_storer_ps(x, y);
729 }
730
test_mm_storeu_ps(float * x,__m128 y)731 void test_mm_storeu_ps(float* x, __m128 y) {
732 // CHECK-LABEL: test_mm_storeu_ps
733 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
734 // CHECK-NEXT: ret void
735 _mm_storeu_ps(x, y);
736 }
737
test_mm_stream_ps(float * A,__m128d B)738 void test_mm_stream_ps(float*A, __m128d B) {
739 // CHECK-LABEL: test_mm_stream_ps
740 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal
741 _mm_stream_ps(A, B);
742 }
743
test_mm_sub_ps(__m128 A,__m128 B)744 __m128 test_mm_sub_ps(__m128 A, __m128 B) {
745 // CHECK-LABEL: test_mm_sub_ps
746 // CHECK: fsub <4 x float>
747 return _mm_sub_ps(A, B);
748 }
749
test_mm_sub_ss(__m128 A,__m128 B)750 __m128 test_mm_sub_ss(__m128 A, __m128 B) {
751 // CHECK-LABEL: test_mm_sub_ss
752 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
753 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
754 // CHECK: fsub float
755 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
756 return _mm_sub_ss(A, B);
757 }
758
test_MM_TRANSPOSE4_PS(__m128 * A,__m128 * B,__m128 * C,__m128 * D)759 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
760 // CHECK-LABEL: test_MM_TRANSPOSE4_PS
761 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
762 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
763 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
764 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
765 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
766 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
767 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
768 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
769 _MM_TRANSPOSE4_PS(*A, *B, *C, *D);
770 }
771
test_mm_ucomieq_ss(__m128 A,__m128 B)772 int test_mm_ucomieq_ss(__m128 A, __m128 B) {
773 // CHECK-LABEL: test_mm_ucomieq_ss
774 // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
775 return _mm_ucomieq_ss(A, B);
776 }
777
test_mm_ucomige_ss(__m128 A,__m128 B)778 int test_mm_ucomige_ss(__m128 A, __m128 B) {
779 // CHECK-LABEL: test_mm_ucomige_ss
780 // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
781 return _mm_ucomige_ss(A, B);
782 }
783
test_mm_ucomigt_ss(__m128 A,__m128 B)784 int test_mm_ucomigt_ss(__m128 A, __m128 B) {
785 // CHECK-LABEL: test_mm_ucomigt_ss
786 // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
787 return _mm_ucomigt_ss(A, B);
788 }
789
test_mm_ucomile_ss(__m128 A,__m128 B)790 int test_mm_ucomile_ss(__m128 A, __m128 B) {
791 // CHECK-LABEL: test_mm_ucomile_ss
792 // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
793 return _mm_ucomile_ss(A, B);
794 }
795
test_mm_ucomilt_ss(__m128 A,__m128 B)796 int test_mm_ucomilt_ss(__m128 A, __m128 B) {
797 // CHECK-LABEL: test_mm_ucomilt_ss
798 // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
799 return _mm_ucomilt_ss(A, B);
800 }
801
test_mm_ucomineq_ss(__m128 A,__m128 B)802 int test_mm_ucomineq_ss(__m128 A, __m128 B) {
803 // CHECK-LABEL: test_mm_ucomineq_ss
804 // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
805 return _mm_ucomineq_ss(A, B);
806 }
807
test_mm_undefined_ps()808 __m128 test_mm_undefined_ps() {
809 // CHECK-LABEL: @test_mm_undefined_ps
810 // CHECK: ret <4 x float> undef
811 return _mm_undefined_ps();
812 }
813
test_mm_unpackhi_ps(__m128 A,__m128 B)814 __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
815 // CHECK-LABEL: test_mm_unpackhi_ps
816 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
817 return _mm_unpackhi_ps(A, B);
818 }
819
test_mm_unpacklo_ps(__m128 A,__m128 B)820 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
821 // CHECK-LABEL: test_mm_unpacklo_ps
822 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
823 return _mm_unpacklo_ps(A, B);
824 }
825
test_mm_xor_ps(__m128 A,__m128 B)826 __m128 test_mm_xor_ps(__m128 A, __m128 B) {
827 // CHECK-LABEL: test_mm_xor_ps
828 // CHECK: xor <4 x i32>
829 return _mm_xor_ps(A, B);
830 }
831