1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse4.1 | FileCheck %s
3
4; This test works just like the non-upgrade one except that it only checks
5; forms which require auto-upgrading.
6
7define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
8; CHECK-LABEL: test_x86_sse41_blendpd:
9; CHECK:       ## BB#0:
10; CHECK-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
11; CHECK-NEXT:    retl
12  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1]
13  ret <2 x double> %res
14}
15declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
16
17
18define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
19; CHECK-LABEL: test_x86_sse41_blendps:
20; CHECK:       ## BB#0:
21; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
22; CHECK-NEXT:    retl
23  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
24  ret <4 x float> %res
25}
26declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
27
28
29define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
30; CHECK-LABEL: test_x86_sse41_dppd:
31; CHECK:       ## BB#0:
32; CHECK-NEXT:    dppd $7, %xmm1, %xmm0
33; CHECK-NEXT:    retl
34  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
35  ret <2 x double> %res
36}
37declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
38
39
40define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
41; CHECK-LABEL: test_x86_sse41_dpps:
42; CHECK:       ## BB#0:
43; CHECK-NEXT:    dpps $7, %xmm1, %xmm0
44; CHECK-NEXT:    retl
45  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
46  ret <4 x float> %res
47}
48declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
49
50
51define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
52; CHECK-LABEL: test_x86_sse41_insertps:
53; CHECK:       ## BB#0:
54; CHECK-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3]
55; CHECK-NEXT:    retl
56  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x float>> [#uses=1]
57  ret <4 x float> %res
58}
59declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
60
61
62define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
63; CHECK-LABEL: test_x86_sse41_mpsadbw:
64; CHECK:       ## BB#0:
65; CHECK-NEXT:    mpsadbw $7, %xmm1, %xmm0
66; CHECK-NEXT:    retl
67  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
68  ret <8 x i16> %res
69}
70declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
71
72
73define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
74; CHECK-LABEL: test_x86_sse41_pblendw:
75; CHECK:       ## BB#0:
76; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
77; CHECK-NEXT:    retl
78  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
79  ret <8 x i16> %res
80}
81declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
82
83
84define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
85; CHECK-LABEL: test_x86_sse41_pmovsxbd:
86; CHECK:       ## BB#0:
87; CHECK-NEXT:    pmovsxbd %xmm0, %xmm0
88; CHECK-NEXT:    retl
89  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
90  ret <4 x i32> %res
91}
92declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
93
94
95define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
96; CHECK-LABEL: test_x86_sse41_pmovsxbq:
97; CHECK:       ## BB#0:
98; CHECK-NEXT:    pmovsxbq %xmm0, %xmm0
99; CHECK-NEXT:    retl
100  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
101  ret <2 x i64> %res
102}
103declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
104
105
106define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
107; CHECK-LABEL: test_x86_sse41_pmovsxbw:
108; CHECK:       ## BB#0:
109; CHECK-NEXT:    pmovsxbw %xmm0, %xmm0
110; CHECK-NEXT:    retl
111  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
112  ret <8 x i16> %res
113}
114declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
115
116
117define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
118; CHECK-LABEL: test_x86_sse41_pmovsxdq:
119; CHECK:       ## BB#0:
120; CHECK-NEXT:    pmovsxdq %xmm0, %xmm0
121; CHECK-NEXT:    retl
122  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
123  ret <2 x i64> %res
124}
125declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
126
127
128define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
129; CHECK-LABEL: test_x86_sse41_pmovsxwd:
130; CHECK:       ## BB#0:
131; CHECK-NEXT:    pmovsxwd %xmm0, %xmm0
132; CHECK-NEXT:    retl
133  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
134  ret <4 x i32> %res
135}
136declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
137
138
139define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
140; CHECK-LABEL: test_x86_sse41_pmovsxwq:
141; CHECK:       ## BB#0:
142; CHECK-NEXT:    pmovsxwq %xmm0, %xmm0
143; CHECK-NEXT:    retl
144  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
145  ret <2 x i64> %res
146}
147declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
148
149
150define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
151; CHECK-LABEL: test_x86_sse41_pmovzxbd:
152; CHECK:       ## BB#0:
153; CHECK-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
154; CHECK-NEXT:    retl
155  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
156  ret <4 x i32> %res
157}
158declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
159
160
161define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
162; CHECK-LABEL: test_x86_sse41_pmovzxbq:
163; CHECK:       ## BB#0:
164; CHECK-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
165; CHECK-NEXT:    retl
166  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
167  ret <2 x i64> %res
168}
169declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
170
171
172define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
173; CHECK-LABEL: test_x86_sse41_pmovzxbw:
174; CHECK:       ## BB#0:
175; CHECK-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
176; CHECK-NEXT:    retl
177  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
178  ret <8 x i16> %res
179}
180declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
181
182
183define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
184; CHECK-LABEL: test_x86_sse41_pmovzxdq:
185; CHECK:       ## BB#0:
186; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
187; CHECK-NEXT:    retl
188  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
189  ret <2 x i64> %res
190}
191declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
192
193
194define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
195; CHECK-LABEL: test_x86_sse41_pmovzxwd:
196; CHECK:       ## BB#0:
197; CHECK-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
198; CHECK-NEXT:    retl
199  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
200  ret <4 x i32> %res
201}
202declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
203
204
205define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
206; CHECK-LABEL: test_x86_sse41_pmovzxwq:
207; CHECK:       ## BB#0:
208; CHECK-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
209; CHECK-NEXT:    retl
210  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
211  ret <2 x i64> %res
212}
213declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
214
215define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) {
216; CHECK-LABEL: max_epi8:
217; CHECK:       ## BB#0:
218; CHECK-NEXT:    pmaxsb %xmm1, %xmm0
219; CHECK-NEXT:    retl
220;
221  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
222  ret <16 x i8> %res
223}
224declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
225
226define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) {
227; CHECK-LABEL: min_epi8:
228; CHECK:       ## BB#0:
229; CHECK-NEXT:    pminsb %xmm1, %xmm0
230; CHECK-NEXT:    retl
231;
232  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
233  ret <16 x i8> %res
234}
235declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
236
237define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) {
238; CHECK-LABEL: max_epu16:
239; CHECK:       ## BB#0:
240; CHECK-NEXT:    pmaxuw %xmm1, %xmm0
241; CHECK-NEXT:    retl
242;
243  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
244  ret <8 x i16> %res
245}
246declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
247
248define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) {
249; CHECK-LABEL: min_epu16:
250; CHECK:       ## BB#0:
251; CHECK-NEXT:    pminuw %xmm1, %xmm0
252; CHECK-NEXT:    retl
253;
254  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
255  ret <8 x i16> %res
256}
257declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
258
259define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) {
260; CHECK-LABEL: max_epi32:
261; CHECK:       ## BB#0:
262; CHECK-NEXT:    pmaxsd %xmm1, %xmm0
263; CHECK-NEXT:    retl
264;
265  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
266  ret <4 x i32> %res
267}
268declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
269
270define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) {
271; CHECK-LABEL: min_epi32:
272; CHECK:       ## BB#0:
273; CHECK-NEXT:    pminsd %xmm1, %xmm0
274; CHECK-NEXT:    retl
275;
276  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
277  ret <4 x i32> %res
278}
279declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
280
281define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) {
282; CHECK-LABEL: max_epu32:
283; CHECK:       ## BB#0:
284; CHECK-NEXT:    pmaxud %xmm1, %xmm0
285; CHECK-NEXT:    retl
286;
287  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
288  ret <4 x i32> %res
289}
290declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
291
292define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) {
293; CHECK-LABEL: min_epu32:
294; CHECK:       ## BB#0:
295; CHECK-NEXT:    pminud %xmm1, %xmm0
296; CHECK-NEXT:    retl
297;
298  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
299  ret <4 x i32> %res
300}
301declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
302
303