1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5
6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
7; SSE-LABEL: sdiv_vec8x16:
8; SSE:       # BB#0: # %entry
9; SSE-NEXT:    movdqa %xmm0, %xmm1
10; SSE-NEXT:    psraw $15, %xmm1
11; SSE-NEXT:    psrlw $11, %xmm1
12; SSE-NEXT:    paddw %xmm0, %xmm1
13; SSE-NEXT:    psraw $5, %xmm1
14; SSE-NEXT:    movdqa %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: sdiv_vec8x16:
18; AVX:       # BB#0: # %entry
19; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
20; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
21; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
22; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
23; AVX-NEXT:    retq
24entry:
25  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
26  ret <8 x i16> %0
27}
28
29define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
30; SSE-LABEL: sdiv_vec8x16_minsize:
31; SSE:       # BB#0: # %entry
32; SSE-NEXT:    movdqa %xmm0, %xmm1
33; SSE-NEXT:    psraw $15, %xmm1
34; SSE-NEXT:    psrlw $11, %xmm1
35; SSE-NEXT:    paddw %xmm0, %xmm1
36; SSE-NEXT:    psraw $5, %xmm1
37; SSE-NEXT:    movdqa %xmm1, %xmm0
38; SSE-NEXT:    retq
39;
40; AVX-LABEL: sdiv_vec8x16_minsize:
41; AVX:       # BB#0: # %entry
42; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
43; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
44; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
45; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
46; AVX-NEXT:    retq
47entry:
48  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
49  ret <8 x i16> %0
50}
51
52define <4 x i32> @sdiv_zero(<4 x i32> %var) {
53; SSE-LABEL: sdiv_zero:
54; SSE:       # BB#0: # %entry
55; SSE-NEXT:    pextrd $1, %xmm0, %eax
56; SSE-NEXT:    xorl %esi, %esi
57; SSE-NEXT:    cltd
58; SSE-NEXT:    idivl %esi
59; SSE-NEXT:    movl %eax, %ecx
60; SSE-NEXT:    movd %xmm0, %eax
61; SSE-NEXT:    cltd
62; SSE-NEXT:    idivl %esi
63; SSE-NEXT:    movd %eax, %xmm1
64; SSE-NEXT:    pinsrd $1, %ecx, %xmm1
65; SSE-NEXT:    pextrd $2, %xmm0, %eax
66; SSE-NEXT:    cltd
67; SSE-NEXT:    idivl %esi
68; SSE-NEXT:    pinsrd $2, %eax, %xmm1
69; SSE-NEXT:    pextrd $3, %xmm0, %eax
70; SSE-NEXT:    cltd
71; SSE-NEXT:    idivl %esi
72; SSE-NEXT:    pinsrd $3, %eax, %xmm1
73; SSE-NEXT:    movdqa %xmm1, %xmm0
74; SSE-NEXT:    retq
75;
76; AVX-LABEL: sdiv_zero:
77; AVX:       # BB#0: # %entry
78; AVX-NEXT:    vpextrd $1, %xmm0, %eax
79; AVX-NEXT:    xorl %esi, %esi
80; AVX-NEXT:    cltd
81; AVX-NEXT:    idivl %esi
82; AVX-NEXT:    movl %eax, %ecx
83; AVX-NEXT:    vmovd %xmm0, %eax
84; AVX-NEXT:    cltd
85; AVX-NEXT:    idivl %esi
86; AVX-NEXT:    vmovd %eax, %xmm1
87; AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
88; AVX-NEXT:    vpextrd $2, %xmm0, %eax
89; AVX-NEXT:    cltd
90; AVX-NEXT:    idivl %esi
91; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
92; AVX-NEXT:    vpextrd $3, %xmm0, %eax
93; AVX-NEXT:    cltd
94; AVX-NEXT:    idivl %esi
95; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
96; AVX-NEXT:    retq
97entry:
98  %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
99  ret <4 x i32> %0
100}
101
102define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
103; SSE-LABEL: sdiv_vec4x32:
104; SSE:       # BB#0: # %entry
105; SSE-NEXT:    movdqa %xmm0, %xmm1
106; SSE-NEXT:    psrad $31, %xmm1
107; SSE-NEXT:    psrld $28, %xmm1
108; SSE-NEXT:    paddd %xmm0, %xmm1
109; SSE-NEXT:    psrad $4, %xmm1
110; SSE-NEXT:    movdqa %xmm1, %xmm0
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: sdiv_vec4x32:
114; AVX:       # BB#0: # %entry
115; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
116; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
117; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
118; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
119; AVX-NEXT:    retq
120entry:
121%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
122ret <4 x i32> %0
123}
124
125define <4 x i32> @sdiv_negative(<4 x i32> %var) {
126; SSE-LABEL: sdiv_negative:
127; SSE:       # BB#0: # %entry
128; SSE-NEXT:    movdqa %xmm0, %xmm1
129; SSE-NEXT:    psrad $31, %xmm1
130; SSE-NEXT:    psrld $28, %xmm1
131; SSE-NEXT:    paddd %xmm0, %xmm1
132; SSE-NEXT:    psrad $4, %xmm1
133; SSE-NEXT:    pxor %xmm0, %xmm0
134; SSE-NEXT:    psubd %xmm1, %xmm0
135; SSE-NEXT:    retq
136;
137; AVX-LABEL: sdiv_negative:
138; AVX:       # BB#0: # %entry
139; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
140; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
141; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
142; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
143; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
144; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
145; AVX-NEXT:    retq
146entry:
147%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
148ret <4 x i32> %0
149}
150
151define <8 x i32> @sdiv8x32(<8 x i32> %var) {
152; SSE-LABEL: sdiv8x32:
153; SSE:       # BB#0: # %entry
154; SSE-NEXT:    movdqa %xmm0, %xmm2
155; SSE-NEXT:    psrad $31, %xmm2
156; SSE-NEXT:    psrld $26, %xmm2
157; SSE-NEXT:    paddd %xmm0, %xmm2
158; SSE-NEXT:    psrad $6, %xmm2
159; SSE-NEXT:    movdqa %xmm1, %xmm3
160; SSE-NEXT:    psrad $31, %xmm3
161; SSE-NEXT:    psrld $26, %xmm3
162; SSE-NEXT:    paddd %xmm1, %xmm3
163; SSE-NEXT:    psrad $6, %xmm3
164; SSE-NEXT:    movdqa %xmm2, %xmm0
165; SSE-NEXT:    movdqa %xmm3, %xmm1
166; SSE-NEXT:    retq
167;
168; AVX1-LABEL: sdiv8x32:
169; AVX1:       # BB#0: # %entry
170; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
171; AVX1-NEXT:    vpsrld $26, %xmm1, %xmm1
172; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
173; AVX1-NEXT:    vpsrad $6, %xmm1, %xmm1
174; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
175; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
176; AVX1-NEXT:    vpsrld $26, %xmm2, %xmm2
177; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
178; AVX1-NEXT:    vpsrad $6, %xmm0, %xmm0
179; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
180; AVX1-NEXT:    retq
181;
182; AVX2-LABEL: sdiv8x32:
183; AVX2:       # BB#0: # %entry
184; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm1
185; AVX2-NEXT:    vpsrld $26, %ymm1, %ymm1
186; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
187; AVX2-NEXT:    vpsrad $6, %ymm0, %ymm0
188; AVX2-NEXT:    retq
189entry:
190%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
191ret <8 x i32> %0
192}
193
194define <16 x i16> @sdiv16x16(<16 x i16> %var) {
195; SSE-LABEL: sdiv16x16:
196; SSE:       # BB#0: # %entry
197; SSE-NEXT:    movdqa %xmm0, %xmm2
198; SSE-NEXT:    psraw $15, %xmm2
199; SSE-NEXT:    psrlw $14, %xmm2
200; SSE-NEXT:    paddw %xmm0, %xmm2
201; SSE-NEXT:    psraw $2, %xmm2
202; SSE-NEXT:    movdqa %xmm1, %xmm3
203; SSE-NEXT:    psraw $15, %xmm3
204; SSE-NEXT:    psrlw $14, %xmm3
205; SSE-NEXT:    paddw %xmm1, %xmm3
206; SSE-NEXT:    psraw $2, %xmm3
207; SSE-NEXT:    movdqa %xmm2, %xmm0
208; SSE-NEXT:    movdqa %xmm3, %xmm1
209; SSE-NEXT:    retq
210;
211; AVX1-LABEL: sdiv16x16:
212; AVX1:       # BB#0: # %entry
213; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
214; AVX1-NEXT:    vpsrlw $14, %xmm1, %xmm1
215; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
216; AVX1-NEXT:    vpsraw $2, %xmm1, %xmm1
217; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
218; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
219; AVX1-NEXT:    vpsrlw $14, %xmm2, %xmm2
220; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
221; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm0
222; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
223; AVX1-NEXT:    retq
224;
225; AVX2-LABEL: sdiv16x16:
226; AVX2:       # BB#0: # %entry
227; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
228; AVX2-NEXT:    vpsrlw $14, %ymm1, %ymm1
229; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
230; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm0
231; AVX2-NEXT:    retq
232entry:
233  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
234  ret <16 x i16> %a0
235}
236
237define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
238; SSE-LABEL: sdiv_non_splat:
239; SSE:       # BB#0:
240; SSE-NEXT:    pextrd $1, %xmm0, %eax
241; SSE-NEXT:    xorl %ecx, %ecx
242; SSE-NEXT:    cltd
243; SSE-NEXT:    idivl %ecx
244; SSE-NEXT:    movd %xmm0, %edx
245; SSE-NEXT:    movl %edx, %esi
246; SSE-NEXT:    shrl $31, %esi
247; SSE-NEXT:    addl %edx, %esi
248; SSE-NEXT:    sarl %esi
249; SSE-NEXT:    movd %esi, %xmm1
250; SSE-NEXT:    pinsrd $1, %eax, %xmm1
251; SSE-NEXT:    pextrd $2, %xmm0, %eax
252; SSE-NEXT:    cltd
253; SSE-NEXT:    idivl %ecx
254; SSE-NEXT:    pinsrd $2, %eax, %xmm1
255; SSE-NEXT:    pextrd $3, %xmm0, %eax
256; SSE-NEXT:    cltd
257; SSE-NEXT:    idivl %ecx
258; SSE-NEXT:    pinsrd $3, %eax, %xmm1
259; SSE-NEXT:    movdqa %xmm1, %xmm0
260; SSE-NEXT:    retq
261;
262; AVX-LABEL: sdiv_non_splat:
263; AVX:       # BB#0:
264; AVX-NEXT:    vpextrd $1, %xmm0, %eax
265; AVX-NEXT:    xorl %ecx, %ecx
266; AVX-NEXT:    cltd
267; AVX-NEXT:    idivl %ecx
268; AVX-NEXT:    vmovd %xmm0, %edx
269; AVX-NEXT:    movl %edx, %esi
270; AVX-NEXT:    shrl $31, %esi
271; AVX-NEXT:    addl %edx, %esi
272; AVX-NEXT:    sarl %esi
273; AVX-NEXT:    vmovd %esi, %xmm1
274; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
275; AVX-NEXT:    vpextrd $2, %xmm0, %eax
276; AVX-NEXT:    cltd
277; AVX-NEXT:    idivl %ecx
278; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
279; AVX-NEXT:    vpextrd $3, %xmm0, %eax
280; AVX-NEXT:    cltd
281; AVX-NEXT:    idivl %ecx
282; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
283; AVX-NEXT:    retq
284  %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
285  ret <4 x i32> %y
286}
287