1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
8
9define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
10; SSE2-LABEL: trunc8i64_8i32:
11; SSE2:       # BB#0: # %entry
12; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
13; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
14; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
16; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
17; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
18; SSE2-NEXT:    retq
19;
20; SSSE3-LABEL: trunc8i64_8i32:
21; SSSE3:       # BB#0: # %entry
22; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
23; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
24; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
26; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
27; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
28; SSSE3-NEXT:    retq
29;
30; SSE41-LABEL: trunc8i64_8i32:
31; SSE41:       # BB#0: # %entry
32; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
33; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
34; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
35; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
36; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
37; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
38; SSE41-NEXT:    retq
39;
40; AVX1-LABEL: trunc8i64_8i32:
41; AVX1:       # BB#0: # %entry
42; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
43; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
44; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
45; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
46; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
47; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
48; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
49; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
50; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
51; AVX1-NEXT:    retq
52;
53; AVX2-LABEL: trunc8i64_8i32:
54; AVX2:       # BB#0: # %entry
55; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
56; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
57; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
58; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
59; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
60; AVX2-NEXT:    retq
61;
62; AVX512BW-LABEL: trunc8i64_8i32:
63; AVX512BW:       # BB#0: # %entry
64; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
65; AVX512BW-NEXT:    retq
66entry:
67  %0 = trunc <8 x i64> %a to <8 x i32>
68  ret <8 x i32> %0
69}
70
71define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
72; SSE2-LABEL: trunc8i64_8i16:
73; SSE2:       # BB#0: # %entry
74; SSE2-NEXT:    pextrw $4, %xmm1, %eax
75; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
76; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
77; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
78; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
79; SSE2-NEXT:    pextrw $4, %xmm3, %edx
80; SSE2-NEXT:    movd %edx, %xmm1
81; SSE2-NEXT:    movd %eax, %xmm3
82; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
83; SSE2-NEXT:    pextrw $4, %xmm2, %eax
84; SSE2-NEXT:    movd %eax, %xmm1
85; SSE2-NEXT:    movd %ecx, %xmm2
86; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
87; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
88; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
89; SSE2-NEXT:    retq
90;
91; SSSE3-LABEL: trunc8i64_8i16:
92; SSSE3:       # BB#0: # %entry
93; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
94; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
95; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
96; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
97; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
98; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
99; SSSE3-NEXT:    movd %edx, %xmm1
100; SSSE3-NEXT:    movd %eax, %xmm3
101; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
102; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
103; SSSE3-NEXT:    movd %eax, %xmm1
104; SSSE3-NEXT:    movd %ecx, %xmm2
105; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
106; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
107; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
108; SSSE3-NEXT:    retq
109;
110; SSE41-LABEL: trunc8i64_8i16:
111; SSE41:       # BB#0: # %entry
112; SSE41-NEXT:    pxor %xmm4, %xmm4
113; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
114; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
115; SSE41-NEXT:    packusdw %xmm3, %xmm2
116; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
117; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
118; SSE41-NEXT:    packusdw %xmm1, %xmm0
119; SSE41-NEXT:    packusdw %xmm2, %xmm0
120; SSE41-NEXT:    retq
121;
122; AVX1-LABEL: trunc8i64_8i16:
123; AVX1:       # BB#0: # %entry
124; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
125; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
126; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
127; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
128; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
129; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
130; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
131; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
132; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
133; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
134; AVX1-NEXT:    vzeroupper
135; AVX1-NEXT:    retq
136;
137; AVX2-LABEL: trunc8i64_8i16:
138; AVX2:       # BB#0: # %entry
139; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
140; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
141; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
142; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
143; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
144; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
145; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
146; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
147; AVX2-NEXT:    vzeroupper
148; AVX2-NEXT:    retq
149;
150; AVX512BW-LABEL: trunc8i64_8i16:
151; AVX512BW:       # BB#0: # %entry
152; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
153; AVX512BW-NEXT:    retq
154entry:
155  %0 = trunc <8 x i64> %a to <8 x i16>
156  ret <8 x i16> %0
157}
158
159define void @trunc8i64_8i8(<8 x i64> %a) {
160; SSE-LABEL: trunc8i64_8i8:
161; SSE:       # BB#0: # %entry
162; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
163; SSE-NEXT:    pand %xmm4, %xmm3
164; SSE-NEXT:    pand %xmm4, %xmm2
165; SSE-NEXT:    packuswb %xmm3, %xmm2
166; SSE-NEXT:    pand %xmm4, %xmm1
167; SSE-NEXT:    pand %xmm4, %xmm0
168; SSE-NEXT:    packuswb %xmm1, %xmm0
169; SSE-NEXT:    packuswb %xmm2, %xmm0
170; SSE-NEXT:    packuswb %xmm0, %xmm0
171; SSE-NEXT:    movq %xmm0, (%rax)
172; SSE-NEXT:    retq
173;
174; AVX1-LABEL: trunc8i64_8i8:
175; AVX1:       # BB#0: # %entry
176; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
177; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
178; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
179; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
180; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
181; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
182; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
183; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
184; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
185; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
186; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
187; AVX1-NEXT:    vmovq %xmm0, (%rax)
188; AVX1-NEXT:    vzeroupper
189; AVX1-NEXT:    retq
190;
191; AVX2-LABEL: trunc8i64_8i8:
192; AVX2:       # BB#0: # %entry
193; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
194; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
195; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
196; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
197; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
198; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
199; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
200; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
201; AVX2-NEXT:    vmovq %xmm0, (%rax)
202; AVX2-NEXT:    vzeroupper
203; AVX2-NEXT:    retq
204;
205; AVX512BW-LABEL: trunc8i64_8i8:
206; AVX512BW:       # BB#0: # %entry
207; AVX512BW-NEXT:    vpmovqb %zmm0, (%rax)
208; AVX512BW-NEXT:    retq
209entry:
210  %0 = trunc <8 x i64> %a to <8 x i8>
211  store <8 x i8> %0, <8 x i8>* undef, align 4
212  ret void
213}
214
215define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
216; SSE2-LABEL: trunc8i32_8i16:
217; SSE2:       # BB#0: # %entry
218; SSE2-NEXT:    pslld $16, %xmm1
219; SSE2-NEXT:    psrad $16, %xmm1
220; SSE2-NEXT:    pslld $16, %xmm0
221; SSE2-NEXT:    psrad $16, %xmm0
222; SSE2-NEXT:    packssdw %xmm1, %xmm0
223; SSE2-NEXT:    retq
224;
225; SSSE3-LABEL: trunc8i32_8i16:
226; SSSE3:       # BB#0: # %entry
227; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
228; SSSE3-NEXT:    pshufb %xmm2, %xmm1
229; SSSE3-NEXT:    pshufb %xmm2, %xmm0
230; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
231; SSSE3-NEXT:    retq
232;
233; SSE41-LABEL: trunc8i32_8i16:
234; SSE41:       # BB#0: # %entry
235; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
236; SSE41-NEXT:    pshufb %xmm2, %xmm1
237; SSE41-NEXT:    pshufb %xmm2, %xmm0
238; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
239; SSE41-NEXT:    retq
240;
241; AVX1-LABEL: trunc8i32_8i16:
242; AVX1:       # BB#0: # %entry
243; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
244; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
245; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
246; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
247; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
248; AVX1-NEXT:    vzeroupper
249; AVX1-NEXT:    retq
250;
251; AVX2-LABEL: trunc8i32_8i16:
252; AVX2:       # BB#0: # %entry
253; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
254; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
255; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
256; AVX2-NEXT:    vzeroupper
257; AVX2-NEXT:    retq
258;
259; AVX512BW-LABEL: trunc8i32_8i16:
260; AVX512BW:       # BB#0: # %entry
261; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
262; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
263; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
264; AVX512BW-NEXT:    retq
265entry:
266  %0 = trunc <8 x i32> %a to <8 x i16>
267  ret <8 x i16> %0
268}
269
270define void @trunc8i32_8i8(<8 x i32> %a) {
271; SSE2-LABEL: trunc8i32_8i8:
272; SSE2:       # BB#0: # %entry
273; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
274; SSE2-NEXT:    pand %xmm2, %xmm1
275; SSE2-NEXT:    pand %xmm2, %xmm0
276; SSE2-NEXT:    packuswb %xmm1, %xmm0
277; SSE2-NEXT:    packuswb %xmm0, %xmm0
278; SSE2-NEXT:    movq %xmm0, (%rax)
279; SSE2-NEXT:    retq
280;
281; SSSE3-LABEL: trunc8i32_8i8:
282; SSSE3:       # BB#0: # %entry
283; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
284; SSSE3-NEXT:    pshufb %xmm2, %xmm1
285; SSSE3-NEXT:    pshufb %xmm2, %xmm0
286; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
287; SSSE3-NEXT:    movq %xmm0, (%rax)
288; SSSE3-NEXT:    retq
289;
290; SSE41-LABEL: trunc8i32_8i8:
291; SSE41:       # BB#0: # %entry
292; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
293; SSE41-NEXT:    pshufb %xmm2, %xmm1
294; SSE41-NEXT:    pshufb %xmm2, %xmm0
295; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
296; SSE41-NEXT:    movq %xmm0, (%rax)
297; SSE41-NEXT:    retq
298;
299; AVX1-LABEL: trunc8i32_8i8:
300; AVX1:       # BB#0: # %entry
301; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
302; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
303; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
304; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
305; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
306; AVX1-NEXT:    vmovq %xmm0, (%rax)
307; AVX1-NEXT:    vzeroupper
308; AVX1-NEXT:    retq
309;
310; AVX2-LABEL: trunc8i32_8i8:
311; AVX2:       # BB#0: # %entry
312; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
313; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
314; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
315; AVX2-NEXT:    vmovq %xmm0, (%rax)
316; AVX2-NEXT:    vzeroupper
317; AVX2-NEXT:    retq
318;
319; AVX512BW-LABEL: trunc8i32_8i8:
320; AVX512BW:       # BB#0: # %entry
321; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
322; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
323; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
324; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
325; AVX512BW-NEXT:    retq
326entry:
327  %0 = trunc <8 x i32> %a to <8 x i8>
328  store <8 x i8> %0, <8 x i8>* undef, align 4
329  ret void
330}
331
332define void @trunc16i32_16i8(<16 x i32> %a) {
333; SSE-LABEL: trunc16i32_16i8:
334; SSE:       # BB#0: # %entry
335; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
336; SSE-NEXT:    pand %xmm4, %xmm3
337; SSE-NEXT:    pand %xmm4, %xmm2
338; SSE-NEXT:    packuswb %xmm3, %xmm2
339; SSE-NEXT:    pand %xmm4, %xmm1
340; SSE-NEXT:    pand %xmm4, %xmm0
341; SSE-NEXT:    packuswb %xmm1, %xmm0
342; SSE-NEXT:    packuswb %xmm2, %xmm0
343; SSE-NEXT:    movdqu %xmm0, (%rax)
344; SSE-NEXT:    retq
345;
346; AVX1-LABEL: trunc16i32_16i8:
347; AVX1:       # BB#0: # %entry
348; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
349; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
350; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
351; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
352; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
353; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
354; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
355; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
356; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
357; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
358; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
359; AVX1-NEXT:    vzeroupper
360; AVX1-NEXT:    retq
361;
362; AVX2-LABEL: trunc16i32_16i8:
363; AVX2:       # BB#0: # %entry
364; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
365; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
366; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
367; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
368; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
369; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
370; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
371; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
372; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
373; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
374; AVX2-NEXT:    vzeroupper
375; AVX2-NEXT:    retq
376;
377; AVX512BW-LABEL: trunc16i32_16i8:
378; AVX512BW:       # BB#0: # %entry
379; AVX512BW-NEXT:    vpmovdb %zmm0, (%rax)
380; AVX512BW-NEXT:    retq
381entry:
382  %0 = trunc <16 x i32> %a to <16 x i8>
383  store <16 x i8> %0, <16 x i8>* undef, align 4
384  ret void
385}
386
387define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
388; SSE2-LABEL: trunc2x4i64_8i32:
389; SSE2:       # BB#0: # %entry
390; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
391; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
392; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
393; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
394; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
395; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
396; SSE2-NEXT:    retq
397;
398; SSSE3-LABEL: trunc2x4i64_8i32:
399; SSSE3:       # BB#0: # %entry
400; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
401; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
402; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
403; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
404; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
405; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
406; SSSE3-NEXT:    retq
407;
408; SSE41-LABEL: trunc2x4i64_8i32:
409; SSE41:       # BB#0: # %entry
410; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
411; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
412; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
413; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
414; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
415; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
416; SSE41-NEXT:    retq
417;
418; AVX1-LABEL: trunc2x4i64_8i32:
419; AVX1:       # BB#0: # %entry
420; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
421; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
422; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
423; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
424; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
425; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
426; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
427; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
428; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
429; AVX1-NEXT:    retq
430;
431; AVX2-LABEL: trunc2x4i64_8i32:
432; AVX2:       # BB#0: # %entry
433; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
434; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
435; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
436; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
437; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
438; AVX2-NEXT:    retq
439;
440; AVX512BW-LABEL: trunc2x4i64_8i32:
441; AVX512BW:       # BB#0: # %entry
442; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
443; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
444; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
445; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
446; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
447; AVX512BW-NEXT:    retq
448entry:
449  %0 = trunc <4 x i64> %a to <4 x i32>
450  %1 = trunc <4 x i64> %b to <4 x i32>
451  %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
452  ret <8 x i32> %2
453}
454
455define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
456; SSE2-LABEL: trunc2x4i64_8i16:
457; SSE2:       # BB#0: # %entry
458; SSE2-NEXT:    pextrw $4, %xmm1, %eax
459; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
460; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
461; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
462; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
463; SSE2-NEXT:    pextrw $4, %xmm3, %edx
464; SSE2-NEXT:    movd %edx, %xmm1
465; SSE2-NEXT:    movd %eax, %xmm3
466; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
467; SSE2-NEXT:    pextrw $4, %xmm2, %eax
468; SSE2-NEXT:    movd %eax, %xmm1
469; SSE2-NEXT:    movd %ecx, %xmm2
470; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
471; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
472; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
473; SSE2-NEXT:    retq
474;
475; SSSE3-LABEL: trunc2x4i64_8i16:
476; SSSE3:       # BB#0: # %entry
477; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
478; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
479; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
480; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
481; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
482; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
483; SSSE3-NEXT:    movd %edx, %xmm1
484; SSSE3-NEXT:    movd %eax, %xmm3
485; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
486; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
487; SSSE3-NEXT:    movd %eax, %xmm1
488; SSSE3-NEXT:    movd %ecx, %xmm2
489; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
490; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
491; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
492; SSSE3-NEXT:    retq
493;
494; SSE41-LABEL: trunc2x4i64_8i16:
495; SSE41:       # BB#0: # %entry
496; SSE41-NEXT:    pextrw $4, %xmm0, %eax
497; SSE41-NEXT:    pinsrw $1, %eax, %xmm0
498; SSE41-NEXT:    movd %xmm1, %eax
499; SSE41-NEXT:    pinsrw $2, %eax, %xmm0
500; SSE41-NEXT:    pextrw $4, %xmm1, %eax
501; SSE41-NEXT:    pinsrw $3, %eax, %xmm0
502; SSE41-NEXT:    movd %xmm2, %eax
503; SSE41-NEXT:    pinsrw $4, %eax, %xmm0
504; SSE41-NEXT:    pextrw $4, %xmm2, %eax
505; SSE41-NEXT:    pinsrw $5, %eax, %xmm0
506; SSE41-NEXT:    movd %xmm3, %eax
507; SSE41-NEXT:    pinsrw $6, %eax, %xmm0
508; SSE41-NEXT:    pextrw $4, %xmm3, %eax
509; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
510; SSE41-NEXT:    retq
511;
512; AVX1-LABEL: trunc2x4i64_8i16:
513; AVX1:       # BB#0: # %entry
514; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
515; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
516; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
517; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
518; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
519; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
520; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
521; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
522; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
523; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
524; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
525; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
526; AVX1-NEXT:    vzeroupper
527; AVX1-NEXT:    retq
528;
529; AVX2-LABEL: trunc2x4i64_8i16:
530; AVX2:       # BB#0: # %entry
531; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
532; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
533; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
534; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
535; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
536; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
537; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
538; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
539; AVX2-NEXT:    vzeroupper
540; AVX2-NEXT:    retq
541;
542; AVX512BW-LABEL: trunc2x4i64_8i16:
543; AVX512BW:       # BB#0: # %entry
544; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
545; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
546; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
547; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
548; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
549; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
550; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
551; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
552; AVX512BW-NEXT:    retq
553entry:
554  %0 = trunc <4 x i64> %a to <4 x i16>
555  %1 = trunc <4 x i64> %b to <4 x i16>
556  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
557  ret <8 x i16> %2
558}
559
560define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
561; SSE2-LABEL: trunc2x2i64_4i32:
562; SSE2:       # BB#0: # %entry
563; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
564; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
565; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
566; SSE2-NEXT:    retq
567;
568; SSSE3-LABEL: trunc2x2i64_4i32:
569; SSSE3:       # BB#0: # %entry
570; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
571; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
572; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
573; SSSE3-NEXT:    retq
574;
575; SSE41-LABEL: trunc2x2i64_4i32:
576; SSE41:       # BB#0: # %entry
577; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
578; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
579; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
580; SSE41-NEXT:    retq
581;
582; AVX1-LABEL: trunc2x2i64_4i32:
583; AVX1:       # BB#0: # %entry
584; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
585; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
586; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
587; AVX1-NEXT:    retq
588;
589; AVX2-LABEL: trunc2x2i64_4i32:
590; AVX2:       # BB#0: # %entry
591; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
592; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
593; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
594; AVX2-NEXT:    retq
595;
596; AVX512BW-LABEL: trunc2x2i64_4i32:
597; AVX512BW:       # BB#0: # %entry
598; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
599; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
600; AVX512BW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
601; AVX512BW-NEXT:    retq
602entry:
603  %0 = trunc <2 x i64> %a to <2 x i32>
604  %1 = trunc <2 x i64> %b to <2 x i32>
605  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
606  ret <4 x i32> %2
607}
608
609define i64 @trunc2i64_i64(<2 x i64> %inval) {
610; SSE-LABEL: trunc2i64_i64:
611; SSE:       # BB#0: # %entry
612; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
613; SSE-NEXT:    movd %xmm0, %rax
614; SSE-NEXT:    retq
615;
616; AVX-LABEL: trunc2i64_i64:
617; AVX:       # BB#0: # %entry
618; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
619; AVX-NEXT:    vmovq %xmm0, %rax
620; AVX-NEXT:    retq
621;
622; AVX512BW-LABEL: trunc2i64_i64:
623; AVX512BW:       # BB#0: # %entry
624; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
625; AVX512BW-NEXT:    vmovq %xmm0, %rax
626; AVX512BW-NEXT:    retq
627entry:
628  %0 = trunc <2 x i64> %inval to <2 x i32>
629  %1 = bitcast <2 x i32> %0 to i64
630  ret i64 %1
631}
632
633define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
634; SSE2-LABEL: trunc2x4i32_8i16:
635; SSE2:       # BB#0: # %entry
636; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
637; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
638; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
639; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
640; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
641; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
642; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
643; SSE2-NEXT:    retq
644;
645; SSSE3-LABEL: trunc2x4i32_8i16:
646; SSSE3:       # BB#0: # %entry
647; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
648; SSSE3-NEXT:    pshufb %xmm2, %xmm1
649; SSSE3-NEXT:    pshufb %xmm2, %xmm0
650; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
651; SSSE3-NEXT:    retq
652;
653; SSE41-LABEL: trunc2x4i32_8i16:
654; SSE41:       # BB#0: # %entry
655; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
656; SSE41-NEXT:    pshufb %xmm2, %xmm1
657; SSE41-NEXT:    pshufb %xmm2, %xmm0
658; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
659; SSE41-NEXT:    retq
660;
661; AVX-LABEL: trunc2x4i32_8i16:
662; AVX:       # BB#0: # %entry
663; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
664; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
665; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
666; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
667; AVX-NEXT:    retq
668;
669; AVX512BW-LABEL: trunc2x4i32_8i16:
670; AVX512BW:       # BB#0: # %entry
671; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
672; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
673; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
674; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
675; AVX512BW-NEXT:    retq
676entry:
677  %0 = trunc <4 x i32> %a to <4 x i16>
678  %1 = trunc <4 x i32> %b to <4 x i16>
679  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
680  ret <8 x i16> %2
681}
682
683; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
684define i64 @trunc4i32_i64(<4 x i32> %inval) {
685; SSE2-LABEL: trunc4i32_i64:
686; SSE2:       # BB#0: # %entry
687; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
688; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
689; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
690; SSE2-NEXT:    movd %xmm0, %rax
691; SSE2-NEXT:    retq
692;
693; SSSE3-LABEL: trunc4i32_i64:
694; SSSE3:       # BB#0: # %entry
695; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
696; SSSE3-NEXT:    movd %xmm0, %rax
697; SSSE3-NEXT:    retq
698;
699; SSE41-LABEL: trunc4i32_i64:
700; SSE41:       # BB#0: # %entry
701; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
702; SSE41-NEXT:    movd %xmm0, %rax
703; SSE41-NEXT:    retq
704;
705; AVX-LABEL: trunc4i32_i64:
706; AVX:       # BB#0: # %entry
707; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
708; AVX-NEXT:    vmovq %xmm0, %rax
709; AVX-NEXT:    retq
710;
711; AVX512BW-LABEL: trunc4i32_i64:
712; AVX512BW:       # BB#0: # %entry
713; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
714; AVX512BW-NEXT:    vmovq %xmm0, %rax
715; AVX512BW-NEXT:    retq
716entry:
717  %0 = trunc <4 x i32> %inval to <4 x i16>
718  %1 = bitcast <4 x i16> %0 to i64
719  ret i64 %1
720}
721
722define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
723; SSE2-LABEL: trunc2x8i16_16i8:
724; SSE2:       # BB#0: # %entry
725; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
726; SSE2-NEXT:    pand %xmm2, %xmm1
727; SSE2-NEXT:    pand %xmm2, %xmm0
728; SSE2-NEXT:    packuswb %xmm1, %xmm0
729; SSE2-NEXT:    retq
730;
731; SSSE3-LABEL: trunc2x8i16_16i8:
732; SSSE3:       # BB#0: # %entry
733; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
734; SSSE3-NEXT:    pshufb %xmm2, %xmm1
735; SSSE3-NEXT:    pshufb %xmm2, %xmm0
736; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
737; SSSE3-NEXT:    retq
738;
739; SSE41-LABEL: trunc2x8i16_16i8:
740; SSE41:       # BB#0: # %entry
741; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
742; SSE41-NEXT:    pshufb %xmm2, %xmm1
743; SSE41-NEXT:    pshufb %xmm2, %xmm0
744; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
745; SSE41-NEXT:    retq
746;
747; AVX-LABEL: trunc2x8i16_16i8:
748; AVX:       # BB#0: # %entry
749; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
750; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
751; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
752; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
753; AVX-NEXT:    retq
754;
755; AVX512BW-LABEL: trunc2x8i16_16i8:
756; AVX512BW:       # BB#0: # %entry
757; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
758; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
759; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
760; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
761; AVX512BW-NEXT:    retq
762entry:
763  %0 = trunc <8 x i16> %a to <8 x i8>
764  %1 = trunc <8 x i16> %b to <8 x i8>
765  %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
766  ret <16 x i8> %2
767}
768
769; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
770define i64 @trunc8i16_i64(<8 x i16> %inval) {
771; SSE2-LABEL: trunc8i16_i64:
772; SSE2:       # BB#0: # %entry
773; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
774; SSE2-NEXT:    packuswb %xmm0, %xmm0
775; SSE2-NEXT:    movd %xmm0, %rax
776; SSE2-NEXT:    retq
777;
778; SSSE3-LABEL: trunc8i16_i64:
779; SSSE3:       # BB#0: # %entry
780; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
781; SSSE3-NEXT:    movd %xmm0, %rax
782; SSSE3-NEXT:    retq
783;
784; SSE41-LABEL: trunc8i16_i64:
785; SSE41:       # BB#0: # %entry
786; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
787; SSE41-NEXT:    movd %xmm0, %rax
788; SSE41-NEXT:    retq
789;
790; AVX-LABEL: trunc8i16_i64:
791; AVX:       # BB#0: # %entry
792; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
793; AVX-NEXT:    vmovq %xmm0, %rax
794; AVX-NEXT:    retq
795;
796; AVX512BW-LABEL: trunc8i16_i64:
797; AVX512BW:       # BB#0: # %entry
798; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
799; AVX512BW-NEXT:    vmovq %xmm0, %rax
800; AVX512BW-NEXT:    retq
801entry:
802  %0 = trunc <8 x i16> %inval to <8 x i8>
803  %1 = bitcast <8 x i8> %0 to i64
804  ret i64 %1
805}
806
807define <16 x i8> @trunc16i64_16i8_const() {
808; SSE-LABEL: trunc16i64_16i8_const:
809; SSE:       # BB#0: # %entry
810; SSE-NEXT:    xorps %xmm0, %xmm0
811; SSE-NEXT:    retq
812;
813; AVX-LABEL: trunc16i64_16i8_const:
814; AVX:       # BB#0: # %entry
815; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
816; AVX-NEXT:    retq
817;
818; AVX512BW-LABEL: trunc16i64_16i8_const:
819; AVX512BW:       # BB#0: # %entry
820; AVX512BW-NEXT:    vxorps %xmm0, %xmm0, %xmm0
821; AVX512BW-NEXT:    retq
822
823entry:
824  %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
825  %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
826  ret <16 x i8> %1
827}
828