1 // RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
2
3 #include <xmmintrin.h>
4 #include <emmintrin.h>
5 #include <smmintrin.h>
6
test_rsqrt_ss(__m128 x)7 __m128 test_rsqrt_ss(__m128 x) {
8 // CHECK: define {{.*}} @test_rsqrt_ss
9 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss
10 // CHECK: extractelement <4 x float> {{.*}}, i32 0
11 // CHECK: extractelement <4 x float> {{.*}}, i32 1
12 // CHECK: extractelement <4 x float> {{.*}}, i32 2
13 // CHECK: extractelement <4 x float> {{.*}}, i32 3
14 return _mm_rsqrt_ss(x);
15 }
16
test_rcp_ss(__m128 x)17 __m128 test_rcp_ss(__m128 x) {
18 // CHECK: define {{.*}} @test_rcp_ss
19 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss
20 // CHECK: extractelement <4 x float> {{.*}}, i32 0
21 // CHECK: extractelement <4 x float> {{.*}}, i32 1
22 // CHECK: extractelement <4 x float> {{.*}}, i32 2
23 // CHECK: extractelement <4 x float> {{.*}}, i32 3
24 return _mm_rcp_ss(x);
25 }
26
test_sqrt_ss(__m128 x)27 __m128 test_sqrt_ss(__m128 x) {
28 // CHECK: define {{.*}} @test_sqrt_ss
29 // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
30 // CHECK: extractelement <4 x float> {{.*}}, i32 0
31 // CHECK: extractelement <4 x float> {{.*}}, i32 1
32 // CHECK: extractelement <4 x float> {{.*}}, i32 2
33 // CHECK: extractelement <4 x float> {{.*}}, i32 3
34 return _mm_sqrt_ss(x);
35 }
36
test_loadl_pi(__m128 x,void * y)37 __m128 test_loadl_pi(__m128 x, void* y) {
38 // CHECK: define {{.*}} @test_loadl_pi
39 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
40 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
41 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
42 return _mm_loadl_pi(x,y);
43 }
44
test_loadh_pi(__m128 x,void * y)45 __m128 test_loadh_pi(__m128 x, void* y) {
46 // CHECK: define {{.*}} @test_loadh_pi
47 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
48 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
49 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
50 return _mm_loadh_pi(x,y);
51 }
52
test_load_ss(void * y)53 __m128 test_load_ss(void* y) {
54 // CHECK: define {{.*}} @test_load_ss
55 // CHECK: load float, float* {{.*}}, align 1{{$}}
56 return _mm_load_ss(y);
57 }
58
test_load1_ps(void * y)59 __m128 test_load1_ps(void* y) {
60 // CHECK: define {{.*}} @test_load1_ps
61 // CHECK: load float, float* {{.*}}, align 1{{$}}
62 return _mm_load1_ps(y);
63 }
64
test_store_ss(__m128 x,void * y)65 void test_store_ss(__m128 x, void* y) {
66 // CHECK-LABEL: define void @test_store_ss
67 // CHECK: store {{.*}} float* {{.*}}, align 1{{$}}
68 _mm_store_ss(y, x);
69 }
70
test_load1_pd(__m128 x,void * y)71 __m128d test_load1_pd(__m128 x, void* y) {
72 // CHECK: define {{.*}} @test_load1_pd
73 // CHECK: load double, double* {{.*}}, align 1{{$}}
74 return _mm_load1_pd(y);
75 }
76
test_loadr_pd(__m128 x,void * y)77 __m128d test_loadr_pd(__m128 x, void* y) {
78 // CHECK: define {{.*}} @test_loadr_pd
79 // CHECK: load <2 x double>, <2 x double>* {{.*}}, align 16{{$}}
80 return _mm_loadr_pd(y);
81 }
82
test_load_sd(void * y)83 __m128d test_load_sd(void* y) {
84 // CHECK: define {{.*}} @test_load_sd
85 // CHECK: load double, double* {{.*}}, align 1{{$}}
86 return _mm_load_sd(y);
87 }
88
test_loadh_pd(__m128d x,void * y)89 __m128d test_loadh_pd(__m128d x, void* y) {
90 // CHECK: define {{.*}} @test_loadh_pd
91 // CHECK: load double, double* {{.*}}, align 1{{$}}
92 return _mm_loadh_pd(x, y);
93 }
94
test_loadl_pd(__m128d x,void * y)95 __m128d test_loadl_pd(__m128d x, void* y) {
96 // CHECK: define {{.*}} @test_loadl_pd
97 // CHECK: load double, double* {{.*}}, align 1{{$}}
98 return _mm_loadl_pd(x, y);
99 }
100
test_store_sd(__m128d x,void * y)101 void test_store_sd(__m128d x, void* y) {
102 // CHECK-LABEL: define void @test_store_sd
103 // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
104 _mm_store_sd(y, x);
105 }
106
test_store1_pd(__m128d x,void * y)107 void test_store1_pd(__m128d x, void* y) {
108 // CHECK-LABEL: define void @test_store1_pd
109 // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
110 // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
111 _mm_store1_pd(y, x);
112 }
113
test_storer_pd(__m128d x,void * y)114 void test_storer_pd(__m128d x, void* y) {
115 // CHECK-LABEL: define void @test_storer_pd
116 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
117 _mm_storer_pd(y, x);
118 }
119
test_storeh_pd(__m128d x,void * y)120 void test_storeh_pd(__m128d x, void* y) {
121 // CHECK-LABEL: define void @test_storeh_pd
122 // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
123 _mm_storeh_pd(y, x);
124 }
125
test_storel_pd(__m128d x,void * y)126 void test_storel_pd(__m128d x, void* y) {
127 // CHECK-LABEL: define void @test_storel_pd
128 // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
129 _mm_storel_pd(y, x);
130 }
131
test_loadl_epi64(void * y)132 __m128i test_loadl_epi64(void* y) {
133 // CHECK: define {{.*}} @test_loadl_epi64
134 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
135 return _mm_loadl_epi64(y);
136 }
137
test_storel_epi64(__m128i x,void * y)138 void test_storel_epi64(__m128i x, void* y) {
139 // CHECK-LABEL: define void @test_storel_epi64
140 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
141 _mm_storel_epi64(y, x);
142 }
143
test_stream_si32(int x,void * y)144 void test_stream_si32(int x, void *y) {
145 // CHECK-LABEL: define void @test_stream_si32
146 // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
147 _mm_stream_si32(y, x);
148 }
149
test_stream_si64(long long x,void * y)150 void test_stream_si64(long long x, void *y) {
151 // CHECK-LABEL: define void @test_stream_si64
152 // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
153 _mm_stream_si64(y, x);
154 }
155
test_stream_si128(__m128i x,void * y)156 void test_stream_si128(__m128i x, void *y) {
157 // CHECK-LABEL: define void @test_stream_si128
158 // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
159 _mm_stream_si128(y, x);
160 }
161
test_extract_epi16(__m128i __a)162 void test_extract_epi16(__m128i __a) {
163 // CHECK-LABEL: define void @test_extract_epi16
164 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
165 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
166 _mm_extract_epi16(__a, 8);
167 }
168
test_mm_cmpeq_ss(__m128 __a,__m128 __b)169 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
170 // CHECK-LABEL: @test_mm_cmpeq_ss
171 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
172 return _mm_cmpeq_ss(__a, __b);
173 }
174
test_mm_cmplt_ss(__m128 __a,__m128 __b)175 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
176 // CHECK-LABEL: @test_mm_cmplt_ss
177 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
178 return _mm_cmplt_ss(__a, __b);
179 }
180
test_mm_cmple_ss(__m128 __a,__m128 __b)181 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
182 // CHECK-LABEL: @test_mm_cmple_ss
183 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
184 return _mm_cmple_ss(__a, __b);
185 }
186
test_mm_cmpunord_ss(__m128 __a,__m128 __b)187 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
188 // CHECK-LABEL: @test_mm_cmpunord_ss
189 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
190 return _mm_cmpunord_ss(__a, __b);
191 }
192
test_mm_cmpneq_ss(__m128 __a,__m128 __b)193 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
194 // CHECK-LABEL: @test_mm_cmpneq_ss
195 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
196 return _mm_cmpneq_ss(__a, __b);
197 }
198
test_mm_cmpnlt_ss(__m128 __a,__m128 __b)199 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
200 // CHECK-LABEL: @test_mm_cmpnlt_ss
201 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
202 return _mm_cmpnlt_ss(__a, __b);
203 }
204
test_mm_cmpnle_ss(__m128 __a,__m128 __b)205 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
206 // CHECK-LABEL: @test_mm_cmpnle_ss
207 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
208 return _mm_cmpnle_ss(__a, __b);
209 }
210
test_mm_cmpord_ss(__m128 __a,__m128 __b)211 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
212 // CHECK-LABEL: @test_mm_cmpord_ss
213 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
214 return _mm_cmpord_ss(__a, __b);
215 }
216
test_mm_cmpgt_ss(__m128 __a,__m128 __b)217 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
218 // CHECK-LABEL: @test_mm_cmpgt_ss
219 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
220 return _mm_cmpgt_ss(__a, __b);
221 }
222
test_mm_cmpge_ss(__m128 __a,__m128 __b)223 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
224 // CHECK-LABEL: @test_mm_cmpge_ss
225 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
226 return _mm_cmpge_ss(__a, __b);
227 }
228
test_mm_cmpngt_ss(__m128 __a,__m128 __b)229 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
230 // CHECK-LABEL: @test_mm_cmpngt_ss
231 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
232 return _mm_cmpngt_ss(__a, __b);
233 }
234
test_mm_cmpnge_ss(__m128 __a,__m128 __b)235 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
236 // CHECK-LABEL: @test_mm_cmpnge_ss
237 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
238 return _mm_cmpnge_ss(__a, __b);
239 }
240
test_mm_cmpeq_ps(__m128 __a,__m128 __b)241 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
242 // CHECK-LABEL: @test_mm_cmpeq_ps
243 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
244 return _mm_cmpeq_ps(__a, __b);
245 }
246
test_mm_cmplt_ps(__m128 __a,__m128 __b)247 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
248 // CHECK-LABEL: @test_mm_cmplt_ps
249 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
250 return _mm_cmplt_ps(__a, __b);
251 }
252
test_mm_cmple_ps(__m128 __a,__m128 __b)253 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
254 // CHECK-LABEL: @test_mm_cmple_ps
255 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
256 return _mm_cmple_ps(__a, __b);
257 }
258
test_mm_cmpunord_ps(__m128 __a,__m128 __b)259 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
260 // CHECK-LABEL: @test_mm_cmpunord_ps
261 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
262 return _mm_cmpunord_ps(__a, __b);
263 }
264
test_mm_cmpneq_ps(__m128 __a,__m128 __b)265 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
266 // CHECK-LABEL: @test_mm_cmpneq_ps
267 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
268 return _mm_cmpneq_ps(__a, __b);
269 }
270
test_mm_cmpnlt_ps(__m128 __a,__m128 __b)271 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
272 // CHECK-LABEL: @test_mm_cmpnlt_ps
273 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
274 return _mm_cmpnlt_ps(__a, __b);
275 }
276
test_mm_cmpnle_ps(__m128 __a,__m128 __b)277 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
278 // CHECK-LABEL: @test_mm_cmpnle_ps
279 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
280 return _mm_cmpnle_ps(__a, __b);
281 }
282
test_mm_cmpord_ps(__m128 __a,__m128 __b)283 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
284 // CHECK-LABEL: @test_mm_cmpord_ps
285 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
286 return _mm_cmpord_ps(__a, __b);
287 }
288
test_mm_cmpgt_ps(__m128 __a,__m128 __b)289 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
290 // CHECK-LABEL: @test_mm_cmpgt_ps
291 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
292 return _mm_cmpgt_ps(__a, __b);
293 }
294
test_mm_cmpge_ps(__m128 __a,__m128 __b)295 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
296 // CHECK-LABEL: @test_mm_cmpge_ps
297 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
298 return _mm_cmpge_ps(__a, __b);
299 }
300
test_mm_cmpngt_ps(__m128 __a,__m128 __b)301 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
302 // CHECK-LABEL: @test_mm_cmpngt_ps
303 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
304 return _mm_cmpngt_ps(__a, __b);
305 }
306
test_mm_cmpnge_ps(__m128 __a,__m128 __b)307 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
308 // CHECK-LABEL: @test_mm_cmpnge_ps
309 // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
310 return _mm_cmpnge_ps(__a, __b);
311 }
312
test_mm_cmpeq_sd(__m128d __a,__m128d __b)313 __m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
314 // CHECK-LABEL: @test_mm_cmpeq_sd
315 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
316 return _mm_cmpeq_sd(__a, __b);
317 }
318
test_mm_cmplt_sd(__m128d __a,__m128d __b)319 __m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
320 // CHECK-LABEL: @test_mm_cmplt_sd
321 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
322 return _mm_cmplt_sd(__a, __b);
323 }
324
test_mm_cmple_sd(__m128d __a,__m128d __b)325 __m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
326 // CHECK-LABEL: @test_mm_cmple_sd
327 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
328 return _mm_cmple_sd(__a, __b);
329 }
330
test_mm_cmpunord_sd(__m128d __a,__m128d __b)331 __m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
332 // CHECK-LABEL: @test_mm_cmpunord_sd
333 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
334 return _mm_cmpunord_sd(__a, __b);
335 }
336
test_mm_cmpneq_sd(__m128d __a,__m128d __b)337 __m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
338 // CHECK-LABEL: @test_mm_cmpneq_sd
339 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
340 return _mm_cmpneq_sd(__a, __b);
341 }
342
test_mm_cmpnlt_sd(__m128d __a,__m128d __b)343 __m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
344 // CHECK-LABEL: @test_mm_cmpnlt_sd
345 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
346 return _mm_cmpnlt_sd(__a, __b);
347 }
348
test_mm_cmpnle_sd(__m128d __a,__m128d __b)349 __m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
350 // CHECK-LABEL: @test_mm_cmpnle_sd
351 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
352 return _mm_cmpnle_sd(__a, __b);
353 }
354
test_mm_cmpord_sd(__m128d __a,__m128d __b)355 __m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
356 // CHECK-LABEL: @test_mm_cmpord_sd
357 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
358 return _mm_cmpord_sd(__a, __b);
359 }
360
test_mm_cmpgt_sd(__m128d __a,__m128d __b)361 __m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
362 // CHECK-LABEL: @test_mm_cmpgt_sd
363 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
364 return _mm_cmpgt_sd(__a, __b);
365 }
366
test_mm_cmpge_sd(__m128d __a,__m128d __b)367 __m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
368 // CHECK-LABEL: @test_mm_cmpge_sd
369 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
370 return _mm_cmpge_sd(__a, __b);
371 }
372
test_mm_cmpngt_sd(__m128d __a,__m128d __b)373 __m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
374 // CHECK-LABEL: @test_mm_cmpngt_sd
375 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
376 return _mm_cmpngt_sd(__a, __b);
377 }
378
test_mm_cmpnge_sd(__m128d __a,__m128d __b)379 __m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
380 // CHECK-LABEL: @test_mm_cmpnge_sd
381 // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
382 return _mm_cmpnge_sd(__a, __b);
383 }
384
test_mm_cmpeq_pd(__m128d __a,__m128d __b)385 __m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
386 // CHECK-LABEL: @test_mm_cmpeq_pd
387 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
388 return _mm_cmpeq_pd(__a, __b);
389 }
390
test_mm_cmplt_pd(__m128d __a,__m128d __b)391 __m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
392 // CHECK-LABEL: @test_mm_cmplt_pd
393 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
394 return _mm_cmplt_pd(__a, __b);
395 }
396
test_mm_cmple_pd(__m128d __a,__m128d __b)397 __m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
398 // CHECK-LABEL: @test_mm_cmple_pd
399 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
400 return _mm_cmple_pd(__a, __b);
401 }
402
test_mm_cmpunord_pd(__m128d __a,__m128d __b)403 __m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
404 // CHECK-LABEL: @test_mm_cmpunord_pd
405 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
406 return _mm_cmpunord_pd(__a, __b);
407 }
408
test_mm_cmpneq_pd(__m128d __a,__m128d __b)409 __m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
410 // CHECK-LABEL: @test_mm_cmpneq_pd
411 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
412 return _mm_cmpneq_pd(__a, __b);
413 }
414
test_mm_cmpnlt_pd(__m128d __a,__m128d __b)415 __m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
416 // CHECK-LABEL: @test_mm_cmpnlt_pd
417 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
418 return _mm_cmpnlt_pd(__a, __b);
419 }
420
test_mm_cmpnle_pd(__m128d __a,__m128d __b)421 __m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
422 // CHECK-LABEL: @test_mm_cmpnle_pd
423 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
424 return _mm_cmpnle_pd(__a, __b);
425 }
426
test_mm_cmpord_pd(__m128d __a,__m128d __b)427 __m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
428 // CHECK-LABEL: @test_mm_cmpord_pd
429 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
430 return _mm_cmpord_pd(__a, __b);
431 }
432
test_mm_cmpgt_pd(__m128d __a,__m128d __b)433 __m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
434 // CHECK-LABEL: @test_mm_cmpgt_pd
435 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
436 return _mm_cmpgt_pd(__a, __b);
437 }
438
test_mm_cmpge_pd(__m128d __a,__m128d __b)439 __m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
440 // CHECK-LABEL: @test_mm_cmpge_pd
441 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
442 return _mm_cmpge_pd(__a, __b);
443 }
444
test_mm_cmpngt_pd(__m128d __a,__m128d __b)445 __m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
446 // CHECK-LABEL: @test_mm_cmpngt_pd
447 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
448 return _mm_cmpngt_pd(__a, __b);
449 }
450
test_mm_cmpnge_pd(__m128d __a,__m128d __b)451 __m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
452 // CHECK-LABEL: @test_mm_cmpnge_pd
453 // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
454 return _mm_cmpnge_pd(__a, __b);
455 }
456
test_mm_slli_si128(__m128 a)457 __m128 test_mm_slli_si128(__m128 a) {
458 // CHECK-LABEL: @test_mm_slli_si128
459 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
460 return _mm_slli_si128(a, 5);
461 }
462
test_mm_bslli_si128(__m128 a)463 __m128 test_mm_bslli_si128(__m128 a) {
464 // CHECK-LABEL: @test_mm_bslli_si128
465 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
466 return _mm_bslli_si128(a, 5);
467 }
468
test_mm_srli_si128(__m128 a)469 __m128 test_mm_srli_si128(__m128 a) {
470 // CHECK-LABEL: @test_mm_srli_si128
471 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
472 return _mm_srli_si128(a, 5);
473 }
474
test_mm_bsrli_si128(__m128 a)475 __m128 test_mm_bsrli_si128(__m128 a) {
476 // CHECK-LABEL: @test_mm_bsrli_si128
477 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
478 return _mm_bsrli_si128(a, 5);
479 }
480
test_mm_undefined_ps()481 __m128 test_mm_undefined_ps() {
482 // CHECK-LABEL: @test_mm_undefined_ps
483 // CHECK: ret <4 x float> undef
484 return _mm_undefined_ps();
485 }
486
test_mm_undefined_pd()487 __m128d test_mm_undefined_pd() {
488 // CHECK-LABEL: @test_mm_undefined_pd
489 // CHECK: ret <2 x double> undef
490 return _mm_undefined_pd();
491 }
492
test_mm_undefined_si128()493 __m128i test_mm_undefined_si128() {
494 // CHECK-LABEL: @test_mm_undefined_si128
495 // CHECK: ret <2 x i64> undef
496 return _mm_undefined_si128();
497 }
498
test_mm_add_si64(__m64 __a,__m64 __b)499 __m64 test_mm_add_si64(__m64 __a, __m64 __b) {
500 // CHECK-LABEL: @test_mm_add_si64
501 // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
502 return _mm_add_si64(__a, __b);
503 }
504
test_mm_sub_si64(__m64 __a,__m64 __b)505 __m64 test_mm_sub_si64(__m64 __a, __m64 __b) {
506 // CHECK-LABEL: @test_mm_sub_si64
507 // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
508 return _mm_sub_si64(__a, __b);
509 }
510
test_mm_mul_su32(__m64 __a,__m64 __b)511 __m64 test_mm_mul_su32(__m64 __a, __m64 __b) {
512 // CHECK-LABEL: @test_mm_mul_su32
513 // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
514 return _mm_mul_su32(__a, __b);
515 }
516
test_mm_pause()517 void test_mm_pause() {
518 // CHECK-LABEL: @test_mm_pause
519 // CHECK @llvm.x86.sse2.pause()
520 return _mm_pause();
521 }
522