1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
6
7define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
8; X32-LABEL: test_mm_abs_epi8:
9; X32:       # BB#0:
10; X32-NEXT:    pabsb %xmm0, %xmm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm_abs_epi8:
14; X64:       # BB#0:
15; X64-NEXT:    pabsb %xmm0, %xmm0
16; X64-NEXT:    retq
17  %arg = bitcast <2 x i64> %a0 to <16 x i8>
18  %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg)
19  %res = bitcast <16 x i8> %call to <2 x i64>
20  ret <2 x i64> %res
21}
22declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
23
24define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
25; X32-LABEL: test_mm_abs_epi16:
26; X32:       # BB#0:
27; X32-NEXT:    pabsw %xmm0, %xmm0
28; X32-NEXT:    retl
29;
30; X64-LABEL: test_mm_abs_epi16:
31; X64:       # BB#0:
32; X64-NEXT:    pabsw %xmm0, %xmm0
33; X64-NEXT:    retq
34  %arg = bitcast <2 x i64> %a0 to <8 x i16>
35  %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg)
36  %res = bitcast <8 x i16> %call to <2 x i64>
37  ret <2 x i64> %res
38}
39declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
40
41define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
42; X32-LABEL: test_mm_abs_epi32:
43; X32:       # BB#0:
44; X32-NEXT:    pabsd %xmm0, %xmm0
45; X32-NEXT:    retl
46;
47; X64-LABEL: test_mm_abs_epi32:
48; X64:       # BB#0:
49; X64-NEXT:    pabsd %xmm0, %xmm0
50; X64-NEXT:    retq
51  %arg = bitcast <2 x i64> %a0 to <4 x i32>
52  %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg)
53  %res = bitcast <4 x i32> %call to <2 x i64>
54  ret <2 x i64> %res
55}
56declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
57
58define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
59; X32-LABEL: test_mm_alignr_epi8:
60; X32:       # BB#0:
61; X32-NEXT:    palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
62; X32-NEXT:    movdqa %xmm1, %xmm0
63; X32-NEXT:    retl
64;
65; X64-LABEL: test_mm_alignr_epi8:
66; X64:       # BB#0:
67; X64-NEXT:    palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
68; X64-NEXT:    movdqa %xmm1, %xmm0
69; X64-NEXT:    retq
70  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
71  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
72  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
73  %res = bitcast <16 x i8> %shuf to <2 x i64>
74  ret <2 x i64> %res
75}
76
77define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
78; X32-LABEL: test2_mm_alignr_epi8:
79; X32:       # BB#0:
80; X32-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
81; X32-NEXT:    movdqa %xmm1, %xmm0
82; X32-NEXT:    retl
83;
84; X64-LABEL: test2_mm_alignr_epi8:
85; X64:       # BB#0:
86; X64-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
87; X64-NEXT:    movdqa %xmm1, %xmm0
88; X64-NEXT:    retq
89  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
90  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
91  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
92  %res = bitcast <16 x i8> %shuf to <2 x i64>
93  ret <2 x i64> %res
94}
95
96define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
97; X32-LABEL: test_mm_hadd_epi16:
98; X32:       # BB#0:
99; X32-NEXT:    phaddw %xmm1, %xmm0
100; X32-NEXT:    retl
101;
102; X64-LABEL: test_mm_hadd_epi16:
103; X64:       # BB#0:
104; X64-NEXT:    phaddw %xmm1, %xmm0
105; X64-NEXT:    retq
106  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
107  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
108  %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
109  %res = bitcast <8 x i16> %call to <2 x i64>
110  ret <2 x i64> %res
111}
112declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
113
114define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
115; X32-LABEL: test_mm_hadd_epi32:
116; X32:       # BB#0:
117; X32-NEXT:    phaddd %xmm1, %xmm0
118; X32-NEXT:    retl
119;
120; X64-LABEL: test_mm_hadd_epi32:
121; X64:       # BB#0:
122; X64-NEXT:    phaddd %xmm1, %xmm0
123; X64-NEXT:    retq
124  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
125  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
126  %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
127  %res = bitcast <4 x i32> %call to <2 x i64>
128  ret <2 x i64> %res
129}
130declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
131
132define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
133; X32-LABEL: test_mm_hadds_epi16:
134; X32:       # BB#0:
135; X32-NEXT:    phaddsw %xmm1, %xmm0
136; X32-NEXT:    retl
137;
138; X64-LABEL: test_mm_hadds_epi16:
139; X64:       # BB#0:
140; X64-NEXT:    phaddsw %xmm1, %xmm0
141; X64-NEXT:    retq
142  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
143  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
144  %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
145  %res = bitcast <8 x i16> %call to <2 x i64>
146  ret <2 x i64> %res
147}
148declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
149
150define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
151; X32-LABEL: test_mm_hsub_epi16:
152; X32:       # BB#0:
153; X32-NEXT:    phsubw %xmm1, %xmm0
154; X32-NEXT:    retl
155;
156; X64-LABEL: test_mm_hsub_epi16:
157; X64:       # BB#0:
158; X64-NEXT:    phsubw %xmm1, %xmm0
159; X64-NEXT:    retq
160  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
161  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
162  %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
163  %res = bitcast <8 x i16> %call to <2 x i64>
164  ret <2 x i64> %res
165}
166declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
167
168define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
169; X32-LABEL: test_mm_hsub_epi32:
170; X32:       # BB#0:
171; X32-NEXT:    phsubd %xmm1, %xmm0
172; X32-NEXT:    retl
173;
174; X64-LABEL: test_mm_hsub_epi32:
175; X64:       # BB#0:
176; X64-NEXT:    phsubd %xmm1, %xmm0
177; X64-NEXT:    retq
178  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
179  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
180  %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
181  %res = bitcast <4 x i32> %call to <2 x i64>
182  ret <2 x i64> %res
183}
184declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
185
186define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
187; X32-LABEL: test_mm_hsubs_epi16:
188; X32:       # BB#0:
189; X32-NEXT:    phsubsw %xmm1, %xmm0
190; X32-NEXT:    retl
191;
192; X64-LABEL: test_mm_hsubs_epi16:
193; X64:       # BB#0:
194; X64-NEXT:    phsubsw %xmm1, %xmm0
195; X64-NEXT:    retq
196  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
197  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
198  %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
199  %res = bitcast <8 x i16> %call to <2 x i64>
200  ret <2 x i64> %res
201}
202declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
203
204define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
205; X32-LABEL: test_mm_maddubs_epi16:
206; X32:       # BB#0:
207; X32-NEXT:    pmaddubsw %xmm1, %xmm0
208; X32-NEXT:    retl
209;
210; X64-LABEL: test_mm_maddubs_epi16:
211; X64:       # BB#0:
212; X64-NEXT:    pmaddubsw %xmm1, %xmm0
213; X64-NEXT:    retq
214  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
215  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
216  %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
217  %res = bitcast <8 x i16> %call to <2 x i64>
218  ret <2 x i64> %res
219}
220declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
221
222define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
223; X32-LABEL: test_mm_mulhrs_epi16:
224; X32:       # BB#0:
225; X32-NEXT:    pmulhrsw %xmm1, %xmm0
226; X32-NEXT:    retl
227;
228; X64-LABEL: test_mm_mulhrs_epi16:
229; X64:       # BB#0:
230; X64-NEXT:    pmulhrsw %xmm1, %xmm0
231; X64-NEXT:    retq
232  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
233  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
234  %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
235  %res = bitcast <8 x i16> %call to <2 x i64>
236  ret <2 x i64> %res
237}
238declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
239
240define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
241; X32-LABEL: test_mm_shuffle_epi8:
242; X32:       # BB#0:
243; X32-NEXT:    pshufb %xmm1, %xmm0
244; X32-NEXT:    retl
245;
246; X64-LABEL: test_mm_shuffle_epi8:
247; X64:       # BB#0:
248; X64-NEXT:    pshufb %xmm1, %xmm0
249; X64-NEXT:    retq
250  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
251  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
252  %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
253  %res = bitcast <16 x i8> %call to <2 x i64>
254  ret <2 x i64> %res
255}
256declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
257
258define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
259; X32-LABEL: test_mm_sign_epi8:
260; X32:       # BB#0:
261; X32-NEXT:    psignb %xmm1, %xmm0
262; X32-NEXT:    retl
263;
264; X64-LABEL: test_mm_sign_epi8:
265; X64:       # BB#0:
266; X64-NEXT:    psignb %xmm1, %xmm0
267; X64-NEXT:    retq
268  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
269  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
270  %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
271  %res = bitcast <16 x i8> %call to <2 x i64>
272  ret <2 x i64> %res
273}
274declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
275
276define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
277; X32-LABEL: test_mm_sign_epi16:
278; X32:       # BB#0:
279; X32-NEXT:    psignw %xmm1, %xmm0
280; X32-NEXT:    retl
281;
282; X64-LABEL: test_mm_sign_epi16:
283; X64:       # BB#0:
284; X64-NEXT:    psignw %xmm1, %xmm0
285; X64-NEXT:    retq
286  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
287  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
288  %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
289  %res = bitcast <8 x i16> %call to <2 x i64>
290  ret <2 x i64> %res
291}
292declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
293
294define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
295; X32-LABEL: test_mm_sign_epi32:
296; X32:       # BB#0:
297; X32-NEXT:    psignd %xmm1, %xmm0
298; X32-NEXT:    retl
299;
300; X64-LABEL: test_mm_sign_epi32:
301; X64:       # BB#0:
302; X64-NEXT:    psignd %xmm1, %xmm0
303; X64-NEXT:    retq
304  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
305  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
306  %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
307  %res = bitcast <4 x i32> %call to <2 x i64>
308  ret <2 x i64> %res
309}
310declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
311