1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5
6define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
7; SSE2-LABEL: trunc2x2i64:
8; SSE2:       # BB#0: # %entry
9; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
10; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
11; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
12; SSE2-NEXT:    retq
13;
14; SSSE3-LABEL: trunc2x2i64:
15; SSSE3:       # BB#0: # %entry
16; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
17; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
18; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
19; SSSE3-NEXT:    retq
20;
21; SSE41-LABEL: trunc2x2i64:
22; SSE41:       # BB#0: # %entry
23; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
24; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
25; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: trunc2x2i64:
29; AVX:       # BB#0: # %entry
30; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
31; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
32; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
33; AVX-NEXT:    retq
34
35
36entry:
37  %0 = trunc <2 x i64> %a to <2 x i32>
38  %1 = trunc <2 x i64> %b to <2 x i32>
39  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
40  ret <4 x i32> %2
41}
42
43define i64 @trunc2i64(<2 x i64> %inval) {
44; SSE-LABEL: trunc2i64:
45; SSE:       # BB#0: # %entry
46; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
47; SSE-NEXT:    movd %xmm0, %rax
48; SSE-NEXT:    retq
49;
50; AVX-LABEL: trunc2i64:
51; AVX:       # BB#0: # %entry
52; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
53; AVX-NEXT:    vmovq %xmm0, %rax
54; AVX-NEXT:    retq
55
56
57entry:
58  %0 = trunc <2 x i64> %inval to <2 x i32>
59  %1 = bitcast <2 x i32> %0 to i64
60  ret i64 %1
61}
62
63define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
64; SSE2-LABEL: trunc2x4i32:
65; SSE2:       # BB#0: # %entry
66; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
67; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
68; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
69; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
70; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
71; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
72; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
73; SSE2-NEXT:    retq
74;
75; SSSE3-LABEL: trunc2x4i32:
76; SSSE3:       # BB#0: # %entry
77; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
78; SSSE3-NEXT:    pshufb %xmm2, %xmm1
79; SSSE3-NEXT:    pshufb %xmm2, %xmm0
80; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
81; SSSE3-NEXT:    retq
82;
83; SSE41-LABEL: trunc2x4i32:
84; SSE41:       # BB#0: # %entry
85; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
86; SSE41-NEXT:    pshufb %xmm2, %xmm1
87; SSE41-NEXT:    pshufb %xmm2, %xmm0
88; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
89; SSE41-NEXT:    retq
90;
91; AVX-LABEL: trunc2x4i32:
92; AVX:       # BB#0: # %entry
93; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
94; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
95; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
96; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
97; AVX-NEXT:    retq
98
99
100
101
102entry:
103  %0 = trunc <4 x i32> %a to <4 x i16>
104  %1 = trunc <4 x i32> %b to <4 x i16>
105  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
106  ret <8 x i16> %2
107}
108
109; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
110define i64 @trunc4i32(<4 x i32> %inval) {
111; SSE2-LABEL: trunc4i32:
112; SSE2:       # BB#0: # %entry
113; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
114; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
115; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
116; SSE2-NEXT:    movd %xmm0, %rax
117; SSE2-NEXT:    retq
118;
119; SSSE3-LABEL: trunc4i32:
120; SSSE3:       # BB#0: # %entry
121; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
122; SSSE3-NEXT:    movd %xmm0, %rax
123; SSSE3-NEXT:    retq
124;
125; SSE41-LABEL: trunc4i32:
126; SSE41:       # BB#0: # %entry
127; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
128; SSE41-NEXT:    movd %xmm0, %rax
129; SSE41-NEXT:    retq
130;
131; AVX-LABEL: trunc4i32:
132; AVX:       # BB#0: # %entry
133; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
134; AVX-NEXT:    vmovq %xmm0, %rax
135; AVX-NEXT:    retq
136
137
138
139
140entry:
141  %0 = trunc <4 x i32> %inval to <4 x i16>
142  %1 = bitcast <4 x i16> %0 to i64
143  ret i64 %1
144}
145
146define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
147; SSE2-LABEL: trunc2x8i16:
148; SSE2:       # BB#0: # %entry
149; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
150; SSE2-NEXT:    pand %xmm2, %xmm1
151; SSE2-NEXT:    pand %xmm2, %xmm0
152; SSE2-NEXT:    packuswb %xmm1, %xmm0
153; SSE2-NEXT:    retq
154;
155; SSSE3-LABEL: trunc2x8i16:
156; SSSE3:       # BB#0: # %entry
157; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
158; SSSE3-NEXT:    pshufb %xmm2, %xmm1
159; SSSE3-NEXT:    pshufb %xmm2, %xmm0
160; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
161; SSSE3-NEXT:    retq
162;
163; SSE41-LABEL: trunc2x8i16:
164; SSE41:       # BB#0: # %entry
165; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
166; SSE41-NEXT:    pshufb %xmm2, %xmm1
167; SSE41-NEXT:    pshufb %xmm2, %xmm0
168; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
169; SSE41-NEXT:    retq
170;
171; AVX-LABEL: trunc2x8i16:
172; AVX:       # BB#0: # %entry
173; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
174; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
175; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
176; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
177; AVX-NEXT:    retq
178
179
180
181
182entry:
183  %0 = trunc <8 x i16> %a to <8 x i8>
184  %1 = trunc <8 x i16> %b to <8 x i8>
185  %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
186  ret <16 x i8> %2
187}
188
189; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
190define i64 @trunc8i16(<8 x i16> %inval) {
191; SSE2-LABEL: trunc8i16:
192; SSE2:       # BB#0: # %entry
193; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
194; SSE2-NEXT:    packuswb %xmm0, %xmm0
195; SSE2-NEXT:    movd %xmm0, %rax
196; SSE2-NEXT:    retq
197;
198; SSSE3-LABEL: trunc8i16:
199; SSSE3:       # BB#0: # %entry
200; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
201; SSSE3-NEXT:    movd %xmm0, %rax
202; SSSE3-NEXT:    retq
203;
204; SSE41-LABEL: trunc8i16:
205; SSE41:       # BB#0: # %entry
206; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
207; SSE41-NEXT:    movd %xmm0, %rax
208; SSE41-NEXT:    retq
209;
210; AVX-LABEL: trunc8i16:
211; AVX:       # BB#0: # %entry
212; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
213; AVX-NEXT:    vmovq %xmm0, %rax
214; AVX-NEXT:    retq
215
216
217
218
219entry:
220  %0 = trunc <8 x i16> %inval to <8 x i8>
221  %1 = bitcast <8 x i8> %0 to i64
222  ret i64 %1
223}
224
225define <16 x i8> @trunc16i64_const() {
226; SSE-LABEL:  trunc16i64_const
227; SSE:        # BB#0: # %entry
228; SSE-NEXT:   xorps %xmm0, %xmm0
229; SSE-NEXT:   retq
230;
231; AVX-LABEL:  trunc16i64_const
232; AVX:        # BB#0: # %entry
233; AVX-NEXT:   vxorps %xmm0, %xmm0, %xmm0
234; AVX-NEXT:   retq
235
236entry:
237  %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
238  %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
239  ret <16 x i8> %1
240}
241