1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4
5define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
6; SSE2-LABEL: trunc_shl_7_v4i32_v4i64:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    movaps (%rsi), %xmm0
9; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
10; SSE2-NEXT:    pslld $7, %xmm0
11; SSE2-NEXT:    movdqa %xmm0, (%rdi)
12; SSE2-NEXT:    retq
13;
14; AVX2-LABEL: trunc_shl_7_v4i32_v4i64:
15; AVX2:       # %bb.0:
16; AVX2-NEXT:    vmovaps (%rsi), %xmm0
17; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
18; AVX2-NEXT:    vpslld $7, %xmm0, %xmm0
19; AVX2-NEXT:    vmovdqa %xmm0, (%rdi)
20; AVX2-NEXT:    retq
21  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in
22  %shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7>
23  %trunc = trunc <4 x i64> %shl to <4 x i32>
24  store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
25  ret void
26}
27
28define <8 x i16> @trunc_shl_15_v8i16_v8i32(<8 x i32> %a) {
29; SSE2-LABEL: trunc_shl_15_v8i16_v8i32:
30; SSE2:       # %bb.0:
31; SSE2-NEXT:    pslld $16, %xmm1
32; SSE2-NEXT:    psrad $16, %xmm1
33; SSE2-NEXT:    pslld $16, %xmm0
34; SSE2-NEXT:    psrad $16, %xmm0
35; SSE2-NEXT:    packssdw %xmm1, %xmm0
36; SSE2-NEXT:    psllw $15, %xmm0
37; SSE2-NEXT:    retq
38;
39; AVX2-LABEL: trunc_shl_15_v8i16_v8i32:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
42; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
43; AVX2-NEXT:    vpsllw $15, %xmm0, %xmm0
44; AVX2-NEXT:    vzeroupper
45; AVX2-NEXT:    retq
46  %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
47  %conv = trunc <8 x i32> %shl to <8 x i16>
48  ret <8 x i16> %conv
49}
50
51define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) {
52; SSE2-LABEL: trunc_shl_16_v8i16_v8i32:
53; SSE2:       # %bb.0:
54; SSE2-NEXT:    xorps %xmm0, %xmm0
55; SSE2-NEXT:    retq
56;
57; AVX2-LABEL: trunc_shl_16_v8i16_v8i32:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
60; AVX2-NEXT:    retq
61  %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
62  %conv = trunc <8 x i32> %shl to <8 x i16>
63  ret <8 x i16> %conv
64}
65
66define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) {
67; SSE2-LABEL: trunc_shl_17_v8i16_v8i32:
68; SSE2:       # %bb.0:
69; SSE2-NEXT:    xorps %xmm0, %xmm0
70; SSE2-NEXT:    retq
71;
72; AVX2-LABEL: trunc_shl_17_v8i16_v8i32:
73; AVX2:       # %bb.0:
74; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
75; AVX2-NEXT:    retq
76  %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
77  %conv = trunc <8 x i32> %shl to <8 x i16>
78  ret <8 x i16> %conv
79}
80
81define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
82; SSE2-LABEL: trunc_shl_31_i32_i64:
83; SSE2:       # %bb.0:
84; SSE2-NEXT:    movl (%rsi), %eax
85; SSE2-NEXT:    shll $31, %eax
86; SSE2-NEXT:    movl %eax, (%rdi)
87; SSE2-NEXT:    retq
88;
89; AVX2-LABEL: trunc_shl_31_i32_i64:
90; AVX2:       # %bb.0:
91; AVX2-NEXT:    movl (%rsi), %eax
92; AVX2-NEXT:    shll $31, %eax
93; AVX2-NEXT:    movl %eax, (%rdi)
94; AVX2-NEXT:    retq
95  %val = load i64, i64* %in
96  %shl = shl i64 %val, 31
97  %trunc = trunc i64 %shl to i32
98  store i32 %trunc, i32* %out
99  ret void
100}
101
102define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
103; SSE2-LABEL: trunc_shl_32_i32_i64:
104; SSE2:       # %bb.0:
105; SSE2-NEXT:    movl $0, (%rdi)
106; SSE2-NEXT:    retq
107;
108; AVX2-LABEL: trunc_shl_32_i32_i64:
109; AVX2:       # %bb.0:
110; AVX2-NEXT:    movl $0, (%rdi)
111; AVX2-NEXT:    retq
112  %val = load i64, i64* %in
113  %shl = shl i64 %val, 32
114  %trunc = trunc i64 %shl to i32
115  store i32 %trunc, i32* %out
116  ret void
117}
118
119define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
120; SSE2-LABEL: trunc_shl_15_i16_i64:
121; SSE2:       # %bb.0:
122; SSE2-NEXT:    movl (%rsi), %eax
123; SSE2-NEXT:    shll $15, %eax
124; SSE2-NEXT:    movw %ax, (%rdi)
125; SSE2-NEXT:    retq
126;
127; AVX2-LABEL: trunc_shl_15_i16_i64:
128; AVX2:       # %bb.0:
129; AVX2-NEXT:    movl (%rsi), %eax
130; AVX2-NEXT:    shll $15, %eax
131; AVX2-NEXT:    movw %ax, (%rdi)
132; AVX2-NEXT:    retq
133  %val = load i64, i64* %in
134  %shl = shl i64 %val, 15
135  %trunc = trunc i64 %shl to i16
136  store i16 %trunc, i16* %out
137  ret void
138}
139
140define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
141; SSE2-LABEL: trunc_shl_16_i16_i64:
142; SSE2:       # %bb.0:
143; SSE2-NEXT:    movw $0, (%rdi)
144; SSE2-NEXT:    retq
145;
146; AVX2-LABEL: trunc_shl_16_i16_i64:
147; AVX2:       # %bb.0:
148; AVX2-NEXT:    movw $0, (%rdi)
149; AVX2-NEXT:    retq
150  %val = load i64, i64* %in
151  %shl = shl i64 %val, 16
152  %trunc = trunc i64 %shl to i16
153  store i16 %trunc, i16* %out
154  ret void
155}
156
157define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
158; SSE2-LABEL: trunc_shl_7_i8_i64:
159; SSE2:       # %bb.0:
160; SSE2-NEXT:    movb (%rsi), %al
161; SSE2-NEXT:    shlb $7, %al
162; SSE2-NEXT:    movb %al, (%rdi)
163; SSE2-NEXT:    retq
164;
165; AVX2-LABEL: trunc_shl_7_i8_i64:
166; AVX2:       # %bb.0:
167; AVX2-NEXT:    movb (%rsi), %al
168; AVX2-NEXT:    shlb $7, %al
169; AVX2-NEXT:    movb %al, (%rdi)
170; AVX2-NEXT:    retq
171  %val = load i64, i64* %in
172  %shl = shl i64 %val, 7
173  %trunc = trunc i64 %shl to i8
174  store i8 %trunc, i8* %out
175  ret void
176}
177
178define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
179; SSE2-LABEL: trunc_shl_8_i8_i64:
180; SSE2:       # %bb.0:
181; SSE2-NEXT:    movb $0, (%rdi)
182; SSE2-NEXT:    retq
183;
184; AVX2-LABEL: trunc_shl_8_i8_i64:
185; AVX2:       # %bb.0:
186; AVX2-NEXT:    movb $0, (%rdi)
187; AVX2-NEXT:    retq
188  %val = load i64, i64* %in
189  %shl = shl i64 %val, 8
190  %trunc = trunc i64 %shl to i8
191  store i8 %trunc, i8* %out
192  ret void
193}
194