1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse41-builtins.c
6
7define <2 x i64> @test_mm_blend_epi16(<2 x i64> %a0, <2 x i64> %a1) {
8; X32-LABEL: test_mm_blend_epi16:
9; X32:       # BB#0:
10; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm_blend_epi16:
14; X64:       # BB#0:
15; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
16; X64-NEXT:    retq
17  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
18  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
19  %shuf = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
20  %res = bitcast <8 x i16> %shuf to <2 x i64>
21  ret <2 x i64> %res
22}
23
24define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) {
25; X32-LABEL: test_mm_blend_pd:
26; X32:       # BB#0:
27; X32-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
28; X32-NEXT:    retl
29;
30; X64-LABEL: test_mm_blend_pd:
31; X64:       # BB#0:
32; X64-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
33; X64-NEXT:    retq
34  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
35  ret <2 x double> %res
36}
37
38define <4 x float> @test_mm_blend_ps(<4 x float> %a0, <4 x float> %a1) {
39; X32-LABEL: test_mm_blend_ps:
40; X32:       # BB#0:
41; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
42; X32-NEXT:    retl
43;
44; X64-LABEL: test_mm_blend_ps:
45; X64:       # BB#0:
46; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
47; X64-NEXT:    retq
48  %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
49  ret <4 x float> %res
50}
51
52define <2 x i64> @test_mm_blendv_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
53; X32-LABEL: test_mm_blendv_epi8:
54; X32:       # BB#0:
55; X32-NEXT:    movdqa %xmm0, %xmm3
56; X32-NEXT:    movaps %xmm2, %xmm0
57; X32-NEXT:    pblendvb %xmm1, %xmm3
58; X32-NEXT:    movdqa %xmm3, %xmm0
59; X32-NEXT:    retl
60;
61; X64-LABEL: test_mm_blendv_epi8:
62; X64:       # BB#0:
63; X64-NEXT:    movdqa %xmm0, %xmm3
64; X64-NEXT:    movaps %xmm2, %xmm0
65; X64-NEXT:    pblendvb %xmm1, %xmm3
66; X64-NEXT:    movdqa %xmm3, %xmm0
67; X64-NEXT:    retq
68  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
69  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
70  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
71  %call = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2)
72  %res = bitcast <16 x i8> %call to <2 x i64>
73  ret <2 x i64> %res
74}
75declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
76
77define <2 x double> @test_mm_blendv_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
78; X32-LABEL: test_mm_blendv_pd:
79; X32:       # BB#0:
80; X32-NEXT:    movapd %xmm0, %xmm3
81; X32-NEXT:    movaps %xmm2, %xmm0
82; X32-NEXT:    blendvpd %xmm1, %xmm3
83; X32-NEXT:    movapd %xmm3, %xmm0
84; X32-NEXT:    retl
85;
86; X64-LABEL: test_mm_blendv_pd:
87; X64:       # BB#0:
88; X64-NEXT:    movapd %xmm0, %xmm3
89; X64-NEXT:    movaps %xmm2, %xmm0
90; X64-NEXT:    blendvpd %xmm1, %xmm3
91; X64-NEXT:    movapd %xmm3, %xmm0
92; X64-NEXT:    retq
93  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
94  ret <2 x double> %res
95}
96declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
97
98define <4 x float> @test_mm_blendv_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
99; X32-LABEL: test_mm_blendv_ps:
100; X32:       # BB#0:
101; X32-NEXT:    movaps %xmm0, %xmm3
102; X32-NEXT:    movaps %xmm2, %xmm0
103; X32-NEXT:    blendvps %xmm1, %xmm3
104; X32-NEXT:    movaps %xmm3, %xmm0
105; X32-NEXT:    retl
106;
107; X64-LABEL: test_mm_blendv_ps:
108; X64:       # BB#0:
109; X64-NEXT:    movaps %xmm0, %xmm3
110; X64-NEXT:    movaps %xmm2, %xmm0
111; X64-NEXT:    blendvps %xmm1, %xmm3
112; X64-NEXT:    movaps %xmm3, %xmm0
113; X64-NEXT:    retq
114  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
115  ret <4 x float> %res
116}
117declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
118
119define <2 x double> @test_mm_ceil_pd(<2 x double> %a0) {
120; X32-LABEL: test_mm_ceil_pd:
121; X32:       # BB#0:
122; X32-NEXT:    roundpd $2, %xmm0, %xmm0
123; X32-NEXT:    retl
124;
125; X64-LABEL: test_mm_ceil_pd:
126; X64:       # BB#0:
127; X64-NEXT:    roundpd $2, %xmm0, %xmm0
128; X64-NEXT:    retq
129  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 2)
130  ret <2 x double> %res
131}
132declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
133
134define <4 x float> @test_mm_ceil_ps(<4 x float> %a0) {
135; X32-LABEL: test_mm_ceil_ps:
136; X32:       # BB#0:
137; X32-NEXT:    roundps $2, %xmm0, %xmm0
138; X32-NEXT:    retl
139;
140; X64-LABEL: test_mm_ceil_ps:
141; X64:       # BB#0:
142; X64-NEXT:    roundps $2, %xmm0, %xmm0
143; X64-NEXT:    retq
144  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 2)
145  ret <4 x float> %res
146}
147declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
148
149define <2 x double> @test_mm_ceil_sd(<2 x double> %a0, <2 x double> %a1) {
150; X32-LABEL: test_mm_ceil_sd:
151; X32:       # BB#0:
152; X32-NEXT:    roundsd $2, %xmm1, %xmm0
153; X32-NEXT:    retl
154;
155; X64-LABEL: test_mm_ceil_sd:
156; X64:       # BB#0:
157; X64-NEXT:    roundsd $2, %xmm1, %xmm0
158; X64-NEXT:    retq
159  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 2)
160  ret <2 x double> %res
161}
162declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
163
164define <4 x float> @test_mm_ceil_ss(<4 x float> %a0, <4 x float> %a1) {
165; X32-LABEL: test_mm_ceil_ss:
166; X32:       # BB#0:
167; X32-NEXT:    roundss $2, %xmm1, %xmm0
168; X32-NEXT:    retl
169;
170; X64-LABEL: test_mm_ceil_ss:
171; X64:       # BB#0:
172; X64-NEXT:    roundss $2, %xmm1, %xmm0
173; X64-NEXT:    retq
174  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 2)
175  ret <4 x float> %res
176}
177declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
178
179define <2 x i64> @test_mm_cmpeq_epi64(<2 x i64> %a0, <2 x i64> %a1) {
180; X32-LABEL: test_mm_cmpeq_epi64:
181; X32:       # BB#0:
182; X32-NEXT:    pcmpeqq %xmm1, %xmm0
183; X32-NEXT:    retl
184;
185; X64-LABEL: test_mm_cmpeq_epi64:
186; X64:       # BB#0:
187; X64-NEXT:    pcmpeqq %xmm1, %xmm0
188; X64-NEXT:    retq
189  %cmp = icmp eq <2 x i64> %a0, %a1
190  %res = sext <2 x i1> %cmp to <2 x i64>
191  ret <2 x i64> %res
192}
193
194define <2 x i64> @test_mm_cvtepi8_epi16(<2 x i64> %a0) {
195; X32-LABEL: test_mm_cvtepi8_epi16:
196; X32:       # BB#0:
197; X32-NEXT:    pmovsxbw %xmm0, %xmm0
198; X32-NEXT:    retl
199;
200; X64-LABEL: test_mm_cvtepi8_epi16:
201; X64:       # BB#0:
202; X64-NEXT:    pmovsxbw %xmm0, %xmm0
203; X64-NEXT:    retq
204  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
205  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
206  %sext = sext <8 x i8> %ext0 to <8 x i16>
207  %res = bitcast <8 x i16> %sext to <2 x i64>
208  ret <2 x i64> %res
209}
210
211define <2 x i64> @test_mm_cvtepi8_epi32(<2 x i64> %a0) {
212; X32-LABEL: test_mm_cvtepi8_epi32:
213; X32:       # BB#0:
214; X32-NEXT:    pmovsxbd %xmm0, %xmm0
215; X32-NEXT:    retl
216;
217; X64-LABEL: test_mm_cvtepi8_epi32:
218; X64:       # BB#0:
219; X64-NEXT:    pmovsxbd %xmm0, %xmm0
220; X64-NEXT:    retq
221  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
222  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
223  %sext = sext <4 x i8> %ext0 to <4 x i32>
224  %res = bitcast <4 x i32> %sext to <2 x i64>
225  ret <2 x i64> %res
226}
227
228define <2 x i64> @test_mm_cvtepi8_epi64(<2 x i64> %a0) {
229; X32-LABEL: test_mm_cvtepi8_epi64:
230; X32:       # BB#0:
231; X32-NEXT:    pmovsxbq %xmm0, %xmm0
232; X32-NEXT:    retl
233;
234; X64-LABEL: test_mm_cvtepi8_epi64:
235; X64:       # BB#0:
236; X64-NEXT:    pmovsxbq %xmm0, %xmm0
237; X64-NEXT:    retq
238  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
239  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
240  %sext = sext <2 x i8> %ext0 to <2 x i64>
241  ret <2 x i64> %sext
242}
243
244define <2 x i64> @test_mm_cvtepi16_epi32(<2 x i64> %a0) {
245; X32-LABEL: test_mm_cvtepi16_epi32:
246; X32:       # BB#0:
247; X32-NEXT:    pmovsxwd %xmm0, %xmm0
248; X32-NEXT:    retl
249;
250; X64-LABEL: test_mm_cvtepi16_epi32:
251; X64:       # BB#0:
252; X64-NEXT:    pmovsxwd %xmm0, %xmm0
253; X64-NEXT:    retq
254  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
255  %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
256  %sext = sext <4 x i16> %ext0 to <4 x i32>
257  %res = bitcast <4 x i32> %sext to <2 x i64>
258  ret <2 x i64> %res
259}
260
261define <2 x i64> @test_mm_cvtepi16_epi64(<2 x i64> %a0) {
262; X32-LABEL: test_mm_cvtepi16_epi64:
263; X32:       # BB#0:
264; X32-NEXT:    pmovsxwq %xmm0, %xmm0
265; X32-NEXT:    retl
266;
267; X64-LABEL: test_mm_cvtepi16_epi64:
268; X64:       # BB#0:
269; X64-NEXT:    pmovsxwq %xmm0, %xmm0
270; X64-NEXT:    retq
271  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272  %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
273  %sext = sext <2 x i16> %ext0 to <2 x i64>
274  ret <2 x i64> %sext
275}
276
277define <2 x i64> @test_mm_cvtepi32_epi64(<2 x i64> %a0) {
278; X32-LABEL: test_mm_cvtepi32_epi64:
279; X32:       # BB#0:
280; X32-NEXT:    pmovsxdq %xmm0, %xmm0
281; X32-NEXT:    retl
282;
283; X64-LABEL: test_mm_cvtepi32_epi64:
284; X64:       # BB#0:
285; X64-NEXT:    pmovsxdq %xmm0, %xmm0
286; X64-NEXT:    retq
287  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
288  %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
289  %sext = sext <2 x i32> %ext0 to <2 x i64>
290  ret <2 x i64> %sext
291}
292
293define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) {
294; X32-LABEL: test_mm_cvtepu8_epi16:
295; X32:       # BB#0:
296; X32-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
297; X32-NEXT:    retl
298;
299; X64-LABEL: test_mm_cvtepu8_epi16:
300; X64:       # BB#0:
301; X64-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
302; X64-NEXT:    retq
303  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
304  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
305  %sext = zext <8 x i8> %ext0 to <8 x i16>
306  %res = bitcast <8 x i16> %sext to <2 x i64>
307  ret <2 x i64> %res
308}
309
310define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) {
311; X32-LABEL: test_mm_cvtepu8_epi32:
312; X32:       # BB#0:
313; X32-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
314; X32-NEXT:    retl
315;
316; X64-LABEL: test_mm_cvtepu8_epi32:
317; X64:       # BB#0:
318; X64-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
319; X64-NEXT:    retq
320  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
321  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
322  %sext = zext <4 x i8> %ext0 to <4 x i32>
323  %res = bitcast <4 x i32> %sext to <2 x i64>
324  ret <2 x i64> %res
325}
326
327define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) {
328; X32-LABEL: test_mm_cvtepu8_epi64:
329; X32:       # BB#0:
330; X32-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
331; X32-NEXT:    retl
332;
333; X64-LABEL: test_mm_cvtepu8_epi64:
334; X64:       # BB#0:
335; X64-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
336; X64-NEXT:    retq
337  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
338  %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
339  %sext = zext <2 x i8> %ext0 to <2 x i64>
340  ret <2 x i64> %sext
341}
342
343define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) {
344; X32-LABEL: test_mm_cvtepu16_epi32:
345; X32:       # BB#0:
346; X32-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
347; X32-NEXT:    retl
348;
349; X64-LABEL: test_mm_cvtepu16_epi32:
350; X64:       # BB#0:
351; X64-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
352; X64-NEXT:    retq
353  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
354  %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
355  %sext = zext <4 x i16> %ext0 to <4 x i32>
356  %res = bitcast <4 x i32> %sext to <2 x i64>
357  ret <2 x i64> %res
358}
359
360define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) {
361; X32-LABEL: test_mm_cvtepu16_epi64:
362; X32:       # BB#0:
363; X32-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
364; X32-NEXT:    retl
365;
366; X64-LABEL: test_mm_cvtepu16_epi64:
367; X64:       # BB#0:
368; X64-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
369; X64-NEXT:    retq
370  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
371  %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
372  %sext = zext <2 x i16> %ext0 to <2 x i64>
373  ret <2 x i64> %sext
374}
375
376define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) {
377; X32-LABEL: test_mm_cvtepu32_epi64:
378; X32:       # BB#0:
379; X32-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
380; X32-NEXT:    retl
381;
382; X64-LABEL: test_mm_cvtepu32_epi64:
383; X64:       # BB#0:
384; X64-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
385; X64-NEXT:    retq
386  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
387  %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
388  %sext = zext <2 x i32> %ext0 to <2 x i64>
389  ret <2 x i64> %sext
390}
391
392define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) {
393; X32-LABEL: test_mm_dp_pd:
394; X32:       # BB#0:
395; X32-NEXT:    dppd $7, %xmm1, %xmm0
396; X32-NEXT:    retl
397;
398; X64-LABEL: test_mm_dp_pd:
399; X64:       # BB#0:
400; X64-NEXT:    dppd $7, %xmm1, %xmm0
401; X64-NEXT:    retq
402  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
403  ret <2 x double> %res
404}
405declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
406
407define <4 x float> @test_mm_dp_ps(<4 x float> %a0, <4 x float> %a1) {
408; X32-LABEL: test_mm_dp_ps:
409; X32:       # BB#0:
410; X32-NEXT:    dpps $7, %xmm1, %xmm0
411; X32-NEXT:    retl
412;
413; X64-LABEL: test_mm_dp_ps:
414; X64:       # BB#0:
415; X64-NEXT:    dpps $7, %xmm1, %xmm0
416; X64-NEXT:    retq
417  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
418  ret <4 x float> %res
419}
420declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
421
422define i32 @test_mm_extract_epi8(<2 x i64> %a0) {
423; X32-LABEL: test_mm_extract_epi8:
424; X32:       # BB#0:
425; X32-NEXT:    pextrb $1, %xmm0, %eax
426; X32-NEXT:    movzbl %al, %eax
427; X32-NEXT:    retl
428;
429; X64-LABEL: test_mm_extract_epi8:
430; X64:       # BB#0:
431; X64-NEXT:    pextrb $1, %xmm0, %eax
432; X64-NEXT:    movzbl %al, %eax
433; X64-NEXT:    retq
434  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
435  %ext = extractelement <16 x i8> %arg0, i32 1
436  %res = zext i8 %ext to i32
437  ret i32 %res
438}
439
440define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
441; X32-LABEL: test_mm_extract_epi32:
442; X32:       # BB#0:
443; X32-NEXT:    pextrd $1, %xmm0, %eax
444; X32-NEXT:    retl
445;
446; X64-LABEL: test_mm_extract_epi32:
447; X64:       # BB#0:
448; X64-NEXT:    pextrd $1, %xmm0, %eax
449; X64-NEXT:    retq
450  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
451  %ext = extractelement <4 x i32> %arg0, i32 1
452  ret i32 %ext
453}
454
455define i64 @test_mm_extract_epi64(<2 x i64> %a0) {
456; X32-LABEL: test_mm_extract_epi64:
457; X32:       # BB#0:
458; X32-NEXT:    pextrd $2, %xmm0, %eax
459; X32-NEXT:    pextrd $3, %xmm0, %edx
460; X32-NEXT:    retl
461;
462; X64-LABEL: test_mm_extract_epi64:
463; X64:       # BB#0:
464; X64-NEXT:    pextrq $1, %xmm0, %rax
465; X64-NEXT:    retq
466  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
467  %ext = extractelement <2 x i64> %a0, i32 1
468  ret i64 %ext
469}
470
471; TODO test_mm_extract_ps
472
473define <2 x double> @test_mm_floor_pd(<2 x double> %a0) {
474; X32-LABEL: test_mm_floor_pd:
475; X32:       # BB#0:
476; X32-NEXT:    roundpd $1, %xmm0, %xmm0
477; X32-NEXT:    retl
478;
479; X64-LABEL: test_mm_floor_pd:
480; X64:       # BB#0:
481; X64-NEXT:    roundpd $1, %xmm0, %xmm0
482; X64-NEXT:    retq
483  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 1)
484  ret <2 x double> %res
485}
486
487define <4 x float> @test_mm_floor_ps(<4 x float> %a0) {
488; X32-LABEL: test_mm_floor_ps:
489; X32:       # BB#0:
490; X32-NEXT:    roundps $1, %xmm0, %xmm0
491; X32-NEXT:    retl
492;
493; X64-LABEL: test_mm_floor_ps:
494; X64:       # BB#0:
495; X64-NEXT:    roundps $1, %xmm0, %xmm0
496; X64-NEXT:    retq
497  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 1)
498  ret <4 x float> %res
499}
500
501define <2 x double> @test_mm_floor_sd(<2 x double> %a0, <2 x double> %a1) {
502; X32-LABEL: test_mm_floor_sd:
503; X32:       # BB#0:
504; X32-NEXT:    roundsd $1, %xmm1, %xmm0
505; X32-NEXT:    retl
506;
507; X64-LABEL: test_mm_floor_sd:
508; X64:       # BB#0:
509; X64-NEXT:    roundsd $1, %xmm1, %xmm0
510; X64-NEXT:    retq
511  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 1)
512  ret <2 x double> %res
513}
514
515define <4 x float> @test_mm_floor_ss(<4 x float> %a0, <4 x float> %a1) {
516; X32-LABEL: test_mm_floor_ss:
517; X32:       # BB#0:
518; X32-NEXT:    roundss $1, %xmm1, %xmm0
519; X32-NEXT:    retl
520;
521; X64-LABEL: test_mm_floor_ss:
522; X64:       # BB#0:
523; X64-NEXT:    roundss $1, %xmm1, %xmm0
524; X64-NEXT:    retq
525  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 1)
526  ret <4 x float> %res
527}
528
529define <2 x i64> @test_mm_insert_epi8(<2 x i64> %a0, i8 %a1) {
530; X32-LABEL: test_mm_insert_epi8:
531; X32:       # BB#0:
532; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
533; X32-NEXT:    pinsrb $1, %eax, %xmm0
534; X32-NEXT:    retl
535;
536; X64-LABEL: test_mm_insert_epi8:
537; X64:       # BB#0:
538; X64-NEXT:    movzbl %dil, %eax
539; X64-NEXT:    pinsrb $1, %eax, %xmm0
540; X64-NEXT:    retq
541  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
542  %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1
543  %bc = bitcast <16 x i8> %res to <2 x i64>
544  ret <2 x i64> %bc
545}
546
547define <2 x i64> @test_mm_insert_epi32(<2 x i64> %a0, i32 %a1) {
548; X32-LABEL: test_mm_insert_epi32:
549; X32:       # BB#0:
550; X32-NEXT:    pinsrd $1, {{[0-9]+}}(%esp), %xmm0
551; X32-NEXT:    retl
552;
553; X64-LABEL: test_mm_insert_epi32:
554; X64:       # BB#0:
555; X64-NEXT:    pinsrd $1, %edi, %xmm0
556; X64-NEXT:    retq
557  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
558  %res = insertelement <4 x i32> %arg0, i32 %a1,i32 1
559  %bc = bitcast <4 x i32> %res to <2 x i64>
560  ret <2 x i64> %bc
561}
562
563define <2 x i64> @test_mm_insert_epi64(<2 x i64> %a0, i64 %a1) {
564; X32-LABEL: test_mm_insert_epi64:
565; X32:       # BB#0:
566; X32-NEXT:    pinsrd $2, {{[0-9]+}}(%esp), %xmm0
567; X32-NEXT:    pinsrd $3, {{[0-9]+}}(%esp), %xmm0
568; X32-NEXT:    retl
569;
570; X64-LABEL: test_mm_insert_epi64:
571; X64:       # BB#0:
572; X64-NEXT:    pinsrq $1, %rdi, %xmm0
573; X64-NEXT:    retq
574  %res = insertelement <2 x i64> %a0, i64 %a1,i32 1
575  ret <2 x i64> %res
576}
577
578define <4 x float> @test_mm_insert_ps(<4 x float> %a0, <4 x float> %a1) {
579; X32-LABEL: test_mm_insert_ps:
580; X32:       # BB#0:
581; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3]
582; X32-NEXT:    retl
583;
584; X64-LABEL: test_mm_insert_ps:
585; X64:       # BB#0:
586; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3]
587; X64-NEXT:    retq
588  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4)
589  ret <4 x float> %res
590}
591declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
592
593define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) {
594; X32-LABEL: test_mm_max_epi8:
595; X32:       # BB#0:
596; X32-NEXT:    pmaxsb %xmm1, %xmm0
597; X32-NEXT:    retl
598;
599; X64-LABEL: test_mm_max_epi8:
600; X64:       # BB#0:
601; X64-NEXT:    pmaxsb %xmm1, %xmm0
602; X64-NEXT:    retq
603  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
604  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
605  %cmp = icmp sgt <16 x i8> %arg0, %arg1
606  %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
607  %bc = bitcast <16 x i8> %sel to <2 x i64>
608  ret <2 x i64> %bc
609}
610
611define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) {
612; X32-LABEL: test_mm_max_epi32:
613; X32:       # BB#0:
614; X32-NEXT:    pmaxsd %xmm1, %xmm0
615; X32-NEXT:    retl
616;
617; X64-LABEL: test_mm_max_epi32:
618; X64:       # BB#0:
619; X64-NEXT:    pmaxsd %xmm1, %xmm0
620; X64-NEXT:    retq
621  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
622  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
623  %cmp = icmp sgt <4 x i32> %arg0, %arg1
624  %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
625  %bc = bitcast <4 x i32> %sel to <2 x i64>
626  ret <2 x i64> %bc
627}
628
629define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) {
630; X32-LABEL: test_mm_max_epu16:
631; X32:       # BB#0:
632; X32-NEXT:    pmaxuw %xmm1, %xmm0
633; X32-NEXT:    retl
634;
635; X64-LABEL: test_mm_max_epu16:
636; X64:       # BB#0:
637; X64-NEXT:    pmaxuw %xmm1, %xmm0
638; X64-NEXT:    retq
639  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
640  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
641  %cmp = icmp ugt <8 x i16> %arg0, %arg1
642  %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
643  %bc = bitcast <8 x i16> %sel to <2 x i64>
644  ret <2 x i64> %bc
645}
646
647define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) {
648; X32-LABEL: test_mm_max_epu32:
649; X32:       # BB#0:
650; X32-NEXT:    pmaxud %xmm1, %xmm0
651; X32-NEXT:    retl
652;
653; X64-LABEL: test_mm_max_epu32:
654; X64:       # BB#0:
655; X64-NEXT:    pmaxud %xmm1, %xmm0
656; X64-NEXT:    retq
657  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
658  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
659  %cmp = icmp ugt <4 x i32> %arg0, %arg1
660  %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
661  %bc = bitcast <4 x i32> %sel to <2 x i64>
662  ret <2 x i64> %bc
663}
664
665define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) {
666; X32-LABEL: test_mm_min_epi8:
667; X32:       # BB#0:
668; X32-NEXT:    pminsb %xmm1, %xmm0
669; X32-NEXT:    retl
670;
671; X64-LABEL: test_mm_min_epi8:
672; X64:       # BB#0:
673; X64-NEXT:    pminsb %xmm1, %xmm0
674; X64-NEXT:    retq
675  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
676  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
677  %cmp = icmp slt <16 x i8> %arg0, %arg1
678  %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
679  %bc = bitcast <16 x i8> %sel to <2 x i64>
680  ret <2 x i64> %bc
681}
682
683define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) {
684; X32-LABEL: test_mm_min_epi32:
685; X32:       # BB#0:
686; X32-NEXT:    pminsd %xmm1, %xmm0
687; X32-NEXT:    retl
688;
689; X64-LABEL: test_mm_min_epi32:
690; X64:       # BB#0:
691; X64-NEXT:    pminsd %xmm1, %xmm0
692; X64-NEXT:    retq
693  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
694  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
695  %cmp = icmp slt <4 x i32> %arg0, %arg1
696  %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
697  %bc = bitcast <4 x i32> %sel to <2 x i64>
698  ret <2 x i64> %bc
699}
700
701define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) {
702; X32-LABEL: test_mm_min_epu16:
703; X32:       # BB#0:
704; X32-NEXT:    pminuw %xmm1, %xmm0
705; X32-NEXT:    retl
706;
707; X64-LABEL: test_mm_min_epu16:
708; X64:       # BB#0:
709; X64-NEXT:    pminuw %xmm1, %xmm0
710; X64-NEXT:    retq
711  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
712  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
713  %cmp = icmp ult <8 x i16> %arg0, %arg1
714  %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
715  %bc = bitcast <8 x i16> %sel to <2 x i64>
716  ret <2 x i64> %bc
717}
718
719define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) {
720; X32-LABEL: test_mm_min_epu32:
721; X32:       # BB#0:
722; X32-NEXT:    pminud %xmm1, %xmm0
723; X32-NEXT:    retl
724;
725; X64-LABEL: test_mm_min_epu32:
726; X64:       # BB#0:
727; X64-NEXT:    pminud %xmm1, %xmm0
728; X64-NEXT:    retq
729  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
730  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
731  %cmp = icmp ult <4 x i32> %arg0, %arg1
732  %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
733  %bc = bitcast <4 x i32> %sel to <2 x i64>
734  ret <2 x i64> %bc
735}
736
737define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) {
738; X32-LABEL: test_mm_minpos_epu16:
739; X32:       # BB#0:
740; X32-NEXT:    phminposuw %xmm0, %xmm0
741; X32-NEXT:    retl
742;
743; X64-LABEL: test_mm_minpos_epu16:
744; X64:       # BB#0:
745; X64-NEXT:    phminposuw %xmm0, %xmm0
746; X64-NEXT:    retq
747  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
748  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %arg0)
749  %bc = bitcast <8 x i16> %res to <2 x i64>
750  ret <2 x i64> %bc
751}
752declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
753
754define <2 x i64> @test_mm_mpsadbw_epu8(<2 x i64> %a0, <2 x i64> %a1) {
755; X32-LABEL: test_mm_mpsadbw_epu8:
756; X32:       # BB#0:
757; X32-NEXT:    mpsadbw $1, %xmm1, %xmm0
758; X32-NEXT:    retl
759;
760; X64-LABEL: test_mm_mpsadbw_epu8:
761; X64:       # BB#0:
762; X64-NEXT:    mpsadbw $1, %xmm1, %xmm0
763; X64-NEXT:    retq
764  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
765  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
766  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %arg0, <16 x i8> %arg1, i8 1)
767  %bc = bitcast <8 x i16> %res to <2 x i64>
768  ret <2 x i64> %bc
769}
770declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
771
772define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) {
773; X32-LABEL: test_mm_mul_epi32:
774; X32:       # BB#0:
775; X32-NEXT:    pmuldq %xmm1, %xmm0
776; X32-NEXT:    retl
777;
778; X64-LABEL: test_mm_mul_epi32:
779; X64:       # BB#0:
780; X64-NEXT:    pmuldq %xmm1, %xmm0
781; X64-NEXT:    retq
782  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
783  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
784  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %arg0, <4 x i32> %arg1)
785  ret <2 x i64> %res
786}
787declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
788
789define <2 x i64> @test_mm_mullo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
790; X32-LABEL: test_mm_mullo_epi32:
791; X32:       # BB#0:
792; X32-NEXT:    pmulld %xmm1, %xmm0
793; X32-NEXT:    retl
794;
795; X64-LABEL: test_mm_mullo_epi32:
796; X64:       # BB#0:
797; X64-NEXT:    pmulld %xmm1, %xmm0
798; X64-NEXT:    retq
799  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
800  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
801  %res = mul <4 x i32> %arg0, %arg1
802  %bc = bitcast <4 x i32> %res to <2 x i64>
803  ret <2 x i64> %bc
804}
805
806define <2 x i64> @test_mm_packus_epi32(<2 x i64> %a0, <2 x i64> %a1) {
807; X32-LABEL: test_mm_packus_epi32:
808; X32:       # BB#0:
809; X32-NEXT:    packusdw %xmm1, %xmm0
810; X32-NEXT:    retl
811;
812; X64-LABEL: test_mm_packus_epi32:
813; X64:       # BB#0:
814; X64-NEXT:    packusdw %xmm1, %xmm0
815; X64-NEXT:    retq
816  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
817  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
818  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %arg0, <4 x i32> %arg1)
819  %bc = bitcast <8 x i16> %res to <2 x i64>
820  ret <2 x i64> %bc
821}
822declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
823
824define <2 x double> @test_mm_round_pd(<2 x double> %a0) {
825; X32-LABEL: test_mm_round_pd:
826; X32:       # BB#0:
827; X32-NEXT:    roundpd $4, %xmm0, %xmm0
828; X32-NEXT:    retl
829;
830; X64-LABEL: test_mm_round_pd:
831; X64:       # BB#0:
832; X64-NEXT:    roundpd $4, %xmm0, %xmm0
833; X64-NEXT:    retq
834  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 4)
835  ret <2 x double> %res
836}
837
838define <4 x float> @test_mm_round_ps(<4 x float> %a0) {
839; X32-LABEL: test_mm_round_ps:
840; X32:       # BB#0:
841; X32-NEXT:    roundps $4, %xmm0, %xmm0
842; X32-NEXT:    retl
843;
844; X64-LABEL: test_mm_round_ps:
845; X64:       # BB#0:
846; X64-NEXT:    roundps $4, %xmm0, %xmm0
847; X64-NEXT:    retq
848  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 4)
849  ret <4 x float> %res
850}
851
852define <2 x double> @test_mm_round_sd(<2 x double> %a0, <2 x double> %a1) {
853; X32-LABEL: test_mm_round_sd:
854; X32:       # BB#0:
855; X32-NEXT:    roundsd $4, %xmm1, %xmm0
856; X32-NEXT:    retl
857;
858; X64-LABEL: test_mm_round_sd:
859; X64:       # BB#0:
860; X64-NEXT:    roundsd $4, %xmm1, %xmm0
861; X64-NEXT:    retq
862  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 4)
863  ret <2 x double> %res
864}
865
866define <4 x float> @test_mm_round_ss(<4 x float> %a0, <4 x float> %a1) {
867; X32-LABEL: test_mm_round_ss:
868; X32:       # BB#0:
869; X32-NEXT:    roundss $4, %xmm1, %xmm0
870; X32-NEXT:    retl
871;
872; X64-LABEL: test_mm_round_ss:
873; X64:       # BB#0:
874; X64-NEXT:    roundss $4, %xmm1, %xmm0
875; X64-NEXT:    retq
876  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 4)
877  ret <4 x float> %res
878}
879
880define <2 x i64> @test_mm_stream_load_si128(<2 x i64>* %a0) {
881; X32-LABEL: test_mm_stream_load_si128:
882; X32:       # BB#0:
883; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
884; X32-NEXT:    movntdqa (%eax), %xmm0
885; X32-NEXT:    retl
886;
887; X64-LABEL: test_mm_stream_load_si128:
888; X64:       # BB#0:
889; X64-NEXT:    movntdqa (%rdi), %xmm0
890; X64-NEXT:    retq
891  %arg0 = bitcast <2 x i64>* %a0 to i8*
892  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0)
893  ret <2 x i64> %res
894}
895declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
896
897define i32 @test_mm_test_all_ones(<2 x i64> %a0) {
898; X32-LABEL: test_mm_test_all_ones:
899; X32:       # BB#0:
900; X32-NEXT:    pcmpeqd %xmm1, %xmm1
901; X32-NEXT:    ptest %xmm1, %xmm0
902; X32-NEXT:    sbbl %eax, %eax
903; X32-NEXT:    andl $1, %eax
904; X32-NEXT:    retl
905;
906; X64-LABEL: test_mm_test_all_ones:
907; X64:       # BB#0:
908; X64-NEXT:    pcmpeqd %xmm1, %xmm1
909; X64-NEXT:    ptest %xmm1, %xmm0
910; X64-NEXT:    sbbl %eax, %eax
911; X64-NEXT:    andl $1, %eax
912; X64-NEXT:    retq
913  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> <i64 -1, i64 -1>)
914  ret i32 %res
915}
916declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
917
918define i32 @test_mm_test_all_zeros(<2 x i64> %a0, <2 x i64> %a1) {
919; X32-LABEL: test_mm_test_all_zeros:
920; X32:       # BB#0:
921; X32-NEXT:    xorl %eax, %eax
922; X32-NEXT:    ptest %xmm1, %xmm0
923; X32-NEXT:    sete %al
924; X32-NEXT:    retl
925;
926; X64-LABEL: test_mm_test_all_zeros:
927; X64:       # BB#0:
928; X64-NEXT:    xorl %eax, %eax
929; X64-NEXT:    ptest %xmm1, %xmm0
930; X64-NEXT:    sete %al
931; X64-NEXT:    retq
932  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1)
933  ret i32 %res
934}
935declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
936
937define i32 @test_mm_test_mix_ones_zeros(<2 x i64> %a0, <2 x i64> %a1) {
938; X32-LABEL: test_mm_test_mix_ones_zeros:
939; X32:       # BB#0:
940; X32-NEXT:    xorl %eax, %eax
941; X32-NEXT:    ptest %xmm1, %xmm0
942; X32-NEXT:    seta %al
943; X32-NEXT:    retl
944;
945; X64-LABEL: test_mm_test_mix_ones_zeros:
946; X64:       # BB#0:
947; X64-NEXT:    xorl %eax, %eax
948; X64-NEXT:    ptest %xmm1, %xmm0
949; X64-NEXT:    seta %al
950; X64-NEXT:    retq
951  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1)
952  ret i32 %res
953}
954declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
955
956define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) {
957; X32-LABEL: test_mm_testc_si128:
958; X32:       # BB#0:
959; X32-NEXT:    ptest %xmm1, %xmm0
960; X32-NEXT:    sbbl %eax, %eax
961; X32-NEXT:    andl $1, %eax
962; X32-NEXT:    retl
963;
964; X64-LABEL: test_mm_testc_si128:
965; X64:       # BB#0:
966; X64-NEXT:    ptest %xmm1, %xmm0
967; X64-NEXT:    sbbl %eax, %eax
968; X64-NEXT:    andl $1, %eax
969; X64-NEXT:    retq
970  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
971  ret i32 %res
972}
973
974define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) {
975; X32-LABEL: test_mm_testnzc_si128:
976; X32:       # BB#0:
977; X32-NEXT:    xorl %eax, %eax
978; X32-NEXT:    ptest %xmm1, %xmm0
979; X32-NEXT:    seta %al
980; X32-NEXT:    retl
981;
982; X64-LABEL: test_mm_testnzc_si128:
983; X64:       # BB#0:
984; X64-NEXT:    xorl %eax, %eax
985; X64-NEXT:    ptest %xmm1, %xmm0
986; X64-NEXT:    seta %al
987; X64-NEXT:    retq
988  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1)
989  ret i32 %res
990}
991
992define i32 @test_mm_testz_si128(<2 x i64> %a0, <2 x i64> %a1) {
993; X32-LABEL: test_mm_testz_si128:
994; X32:       # BB#0:
995; X32-NEXT:    xorl %eax, %eax
996; X32-NEXT:    ptest %xmm1, %xmm0
997; X32-NEXT:    sete %al
998; X32-NEXT:    retl
999;
1000; X64-LABEL: test_mm_testz_si128:
1001; X64:       # BB#0:
1002; X64-NEXT:    xorl %eax, %eax
1003; X64-NEXT:    ptest %xmm1, %xmm0
1004; X64-NEXT:    sete %al
1005; X64-NEXT:    retq
1006  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1)
1007  ret i32 %res
1008}
1009