1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5
6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
7; SSE-LABEL: sdiv_vec8x16:
8; SSE:       # %bb.0: # %entry
9; SSE-NEXT:    movdqa %xmm0, %xmm1
10; SSE-NEXT:    psraw $15, %xmm1
11; SSE-NEXT:    psrlw $11, %xmm1
12; SSE-NEXT:    paddw %xmm0, %xmm1
13; SSE-NEXT:    psraw $5, %xmm1
14; SSE-NEXT:    movdqa %xmm1, %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: sdiv_vec8x16:
18; AVX:       # %bb.0: # %entry
19; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
20; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
21; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
22; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
23; AVX-NEXT:    retq
24entry:
25  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
26  ret <8 x i16> %0
27}
28
29define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
30; SSE-LABEL: sdiv_vec8x16_minsize:
31; SSE:       # %bb.0: # %entry
32; SSE-NEXT:    movdqa %xmm0, %xmm1
33; SSE-NEXT:    psraw $15, %xmm1
34; SSE-NEXT:    psrlw $11, %xmm1
35; SSE-NEXT:    paddw %xmm0, %xmm1
36; SSE-NEXT:    psraw $5, %xmm1
37; SSE-NEXT:    movdqa %xmm1, %xmm0
38; SSE-NEXT:    retq
39;
40; AVX-LABEL: sdiv_vec8x16_minsize:
41; AVX:       # %bb.0: # %entry
42; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
43; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
44; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
45; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
46; AVX-NEXT:    retq
47entry:
48  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
49  ret <8 x i16> %0
50}
51
52define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
53; SSE-LABEL: sdiv_vec4x32:
54; SSE:       # %bb.0: # %entry
55; SSE-NEXT:    movdqa %xmm0, %xmm1
56; SSE-NEXT:    psrad $31, %xmm1
57; SSE-NEXT:    psrld $28, %xmm1
58; SSE-NEXT:    paddd %xmm0, %xmm1
59; SSE-NEXT:    psrad $4, %xmm1
60; SSE-NEXT:    movdqa %xmm1, %xmm0
61; SSE-NEXT:    retq
62;
63; AVX-LABEL: sdiv_vec4x32:
64; AVX:       # %bb.0: # %entry
65; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
66; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
67; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
68; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
69; AVX-NEXT:    retq
70entry:
71%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
72ret <4 x i32> %0
73}
74
75define <4 x i32> @sdiv_negative(<4 x i32> %var) {
76; SSE-LABEL: sdiv_negative:
77; SSE:       # %bb.0: # %entry
78; SSE-NEXT:    movdqa %xmm0, %xmm1
79; SSE-NEXT:    psrad $31, %xmm1
80; SSE-NEXT:    psrld $28, %xmm1
81; SSE-NEXT:    paddd %xmm0, %xmm1
82; SSE-NEXT:    psrad $4, %xmm1
83; SSE-NEXT:    pxor %xmm0, %xmm0
84; SSE-NEXT:    psubd %xmm1, %xmm0
85; SSE-NEXT:    retq
86;
87; AVX-LABEL: sdiv_negative:
88; AVX:       # %bb.0: # %entry
89; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
90; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
91; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
92; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
93; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
94; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
95; AVX-NEXT:    retq
96entry:
97%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
98ret <4 x i32> %0
99}
100
101define <8 x i32> @sdiv8x32(<8 x i32> %var) {
102; SSE-LABEL: sdiv8x32:
103; SSE:       # %bb.0: # %entry
104; SSE-NEXT:    movdqa %xmm0, %xmm2
105; SSE-NEXT:    psrad $31, %xmm2
106; SSE-NEXT:    psrld $26, %xmm2
107; SSE-NEXT:    paddd %xmm0, %xmm2
108; SSE-NEXT:    psrad $6, %xmm2
109; SSE-NEXT:    movdqa %xmm1, %xmm3
110; SSE-NEXT:    psrad $31, %xmm3
111; SSE-NEXT:    psrld $26, %xmm3
112; SSE-NEXT:    paddd %xmm1, %xmm3
113; SSE-NEXT:    psrad $6, %xmm3
114; SSE-NEXT:    movdqa %xmm2, %xmm0
115; SSE-NEXT:    movdqa %xmm3, %xmm1
116; SSE-NEXT:    retq
117;
118; AVX1-LABEL: sdiv8x32:
119; AVX1:       # %bb.0: # %entry
120; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
121; AVX1-NEXT:    vpsrld $26, %xmm1, %xmm1
122; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
123; AVX1-NEXT:    vpsrad $6, %xmm1, %xmm1
124; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
125; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
126; AVX1-NEXT:    vpsrld $26, %xmm2, %xmm2
127; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
128; AVX1-NEXT:    vpsrad $6, %xmm0, %xmm0
129; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
130; AVX1-NEXT:    retq
131;
132; AVX2-LABEL: sdiv8x32:
133; AVX2:       # %bb.0: # %entry
134; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm1
135; AVX2-NEXT:    vpsrld $26, %ymm1, %ymm1
136; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
137; AVX2-NEXT:    vpsrad $6, %ymm0, %ymm0
138; AVX2-NEXT:    retq
139entry:
140%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
141ret <8 x i32> %0
142}
143
144define <16 x i16> @sdiv16x16(<16 x i16> %var) {
145; SSE-LABEL: sdiv16x16:
146; SSE:       # %bb.0: # %entry
147; SSE-NEXT:    movdqa %xmm0, %xmm2
148; SSE-NEXT:    psraw $15, %xmm2
149; SSE-NEXT:    psrlw $14, %xmm2
150; SSE-NEXT:    paddw %xmm0, %xmm2
151; SSE-NEXT:    psraw $2, %xmm2
152; SSE-NEXT:    movdqa %xmm1, %xmm3
153; SSE-NEXT:    psraw $15, %xmm3
154; SSE-NEXT:    psrlw $14, %xmm3
155; SSE-NEXT:    paddw %xmm1, %xmm3
156; SSE-NEXT:    psraw $2, %xmm3
157; SSE-NEXT:    movdqa %xmm2, %xmm0
158; SSE-NEXT:    movdqa %xmm3, %xmm1
159; SSE-NEXT:    retq
160;
161; AVX1-LABEL: sdiv16x16:
162; AVX1:       # %bb.0: # %entry
163; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
164; AVX1-NEXT:    vpsrlw $14, %xmm1, %xmm1
165; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
166; AVX1-NEXT:    vpsraw $2, %xmm1, %xmm1
167; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
168; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
169; AVX1-NEXT:    vpsrlw $14, %xmm2, %xmm2
170; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
171; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm0
172; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
173; AVX1-NEXT:    retq
174;
175; AVX2-LABEL: sdiv16x16:
176; AVX2:       # %bb.0: # %entry
177; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
178; AVX2-NEXT:    vpsrlw $14, %ymm1, %ymm1
179; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
180; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm0
181; AVX2-NEXT:    retq
182entry:
183  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
184  ret <16 x i16> %a0
185}
186
187; Div-by-0 in any lane is UB.
188
189define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
190; SSE-LABEL: sdiv_non_splat:
191; SSE:       # %bb.0:
192; SSE-NEXT:    retq
193;
194; AVX-LABEL: sdiv_non_splat:
195; AVX:       # %bb.0:
196; AVX-NEXT:    retq
197  %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
198  ret <4 x i32> %y
199}
200