1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6
7; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276
8; If both operands of an unsigned icmp are known non-negative, then
9; we don't need to flip the sign bits in order to map to signed pcmpgt*.
10
11define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
12; SSE-LABEL: ugt_v2i64:
13; SSE:       # %bb.0:
14; SSE-NEXT:    psrlq $1, %xmm0
15; SSE-NEXT:    psrlq $1, %xmm1
16; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
17; SSE-NEXT:    pxor %xmm2, %xmm1
18; SSE-NEXT:    pxor %xmm2, %xmm0
19; SSE-NEXT:    movdqa %xmm0, %xmm2
20; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
21; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
22; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
23; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
24; SSE-NEXT:    pand %xmm3, %xmm1
25; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
26; SSE-NEXT:    por %xmm1, %xmm0
27; SSE-NEXT:    retq
28;
29; AVX-LABEL: ugt_v2i64:
30; AVX:       # %bb.0:
31; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
32; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
33; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
34; AVX-NEXT:    retq
35  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
36  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
37  %cmp = icmp ugt <2 x i64> %sh1, %sh2
38  ret <2 x i1> %cmp
39}
40
41define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
42; SSE-LABEL: ult_v2i64:
43; SSE:       # %bb.0:
44; SSE-NEXT:    psrlq $1, %xmm0
45; SSE-NEXT:    psrlq $1, %xmm1
46; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
47; SSE-NEXT:    pxor %xmm2, %xmm0
48; SSE-NEXT:    pxor %xmm2, %xmm1
49; SSE-NEXT:    movdqa %xmm1, %xmm2
50; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
51; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
52; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
53; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
54; SSE-NEXT:    pand %xmm3, %xmm1
55; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
56; SSE-NEXT:    por %xmm1, %xmm0
57; SSE-NEXT:    retq
58;
59; AVX-LABEL: ult_v2i64:
60; AVX:       # %bb.0:
61; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
62; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
63; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
64; AVX-NEXT:    retq
65  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
66  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
67  %cmp = icmp ult <2 x i64> %sh1, %sh2
68  ret <2 x i1> %cmp
69}
70
71define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
72; SSE-LABEL: uge_v2i64:
73; SSE:       # %bb.0:
74; SSE-NEXT:    psrlq $1, %xmm0
75; SSE-NEXT:    psrlq $1, %xmm1
76; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
77; SSE-NEXT:    pxor %xmm2, %xmm0
78; SSE-NEXT:    pxor %xmm2, %xmm1
79; SSE-NEXT:    movdqa %xmm1, %xmm2
80; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
81; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
82; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
83; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
84; SSE-NEXT:    pand %xmm3, %xmm0
85; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
86; SSE-NEXT:    por %xmm0, %xmm1
87; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
88; SSE-NEXT:    pxor %xmm1, %xmm0
89; SSE-NEXT:    retq
90;
91; AVX-LABEL: uge_v2i64:
92; AVX:       # %bb.0:
93; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
94; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
95; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
96; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
97; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
98; AVX-NEXT:    retq
99  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
100  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
101  %cmp = icmp uge <2 x i64> %sh1, %sh2
102  ret <2 x i1> %cmp
103}
104
105define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
106; SSE-LABEL: ule_v2i64:
107; SSE:       # %bb.0:
108; SSE-NEXT:    psrlq $1, %xmm0
109; SSE-NEXT:    psrlq $1, %xmm1
110; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
111; SSE-NEXT:    pxor %xmm2, %xmm1
112; SSE-NEXT:    pxor %xmm2, %xmm0
113; SSE-NEXT:    movdqa %xmm0, %xmm2
114; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
115; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
116; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
117; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
118; SSE-NEXT:    pand %xmm3, %xmm0
119; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
120; SSE-NEXT:    por %xmm0, %xmm1
121; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
122; SSE-NEXT:    pxor %xmm1, %xmm0
123; SSE-NEXT:    retq
124;
125; AVX-LABEL: ule_v2i64:
126; AVX:       # %bb.0:
127; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
128; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
129; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
130; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
131; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
132; AVX-NEXT:    retq
133  %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
134  %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
135  %cmp = icmp ule <2 x i64> %sh1, %sh2
136  ret <2 x i1> %cmp
137}
138
139define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
140; SSE-LABEL: ugt_v4i32:
141; SSE:       # %bb.0:
142; SSE-NEXT:    psrld $1, %xmm0
143; SSE-NEXT:    psrld $1, %xmm1
144; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
145; SSE-NEXT:    retq
146;
147; AVX-LABEL: ugt_v4i32:
148; AVX:       # %bb.0:
149; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
150; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
151; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
152; AVX-NEXT:    retq
153  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
154  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
155  %cmp = icmp ugt <4 x i32> %sh1, %sh2
156  ret <4 x i1> %cmp
157}
158
159define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
160; SSE-LABEL: ult_v4i32:
161; SSE:       # %bb.0:
162; SSE-NEXT:    psrld $1, %xmm0
163; SSE-NEXT:    psrld $1, %xmm1
164; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
165; SSE-NEXT:    movdqa %xmm1, %xmm0
166; SSE-NEXT:    retq
167;
168; AVX-LABEL: ult_v4i32:
169; AVX:       # %bb.0:
170; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
171; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
172; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
173; AVX-NEXT:    retq
174  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
175  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
176  %cmp = icmp ult <4 x i32> %sh1, %sh2
177  ret <4 x i1> %cmp
178}
179
180define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
181; SSE2-LABEL: uge_v4i32:
182; SSE2:       # %bb.0:
183; SSE2-NEXT:    psrld $1, %xmm0
184; SSE2-NEXT:    psrld $1, %xmm1
185; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
186; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
187; SSE2-NEXT:    pxor %xmm1, %xmm0
188; SSE2-NEXT:    retq
189;
190; SSE41-LABEL: uge_v4i32:
191; SSE41:       # %bb.0:
192; SSE41-NEXT:    psrld $1, %xmm0
193; SSE41-NEXT:    psrld $1, %xmm1
194; SSE41-NEXT:    pmaxud %xmm0, %xmm1
195; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
196; SSE41-NEXT:    retq
197;
198; AVX-LABEL: uge_v4i32:
199; AVX:       # %bb.0:
200; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
201; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
202; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm1
203; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
204; AVX-NEXT:    retq
205  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
206  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
207  %cmp = icmp uge <4 x i32> %sh1, %sh2
208  ret <4 x i1> %cmp
209}
210
211define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
212; SSE2-LABEL: ule_v4i32:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    psrld $1, %xmm0
215; SSE2-NEXT:    psrld $1, %xmm1
216; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
217; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
218; SSE2-NEXT:    pxor %xmm1, %xmm0
219; SSE2-NEXT:    retq
220;
221; SSE41-LABEL: ule_v4i32:
222; SSE41:       # %bb.0:
223; SSE41-NEXT:    psrld $1, %xmm0
224; SSE41-NEXT:    psrld $1, %xmm1
225; SSE41-NEXT:    pminud %xmm0, %xmm1
226; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
227; SSE41-NEXT:    retq
228;
229; AVX-LABEL: ule_v4i32:
230; AVX:       # %bb.0:
231; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
232; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
233; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm1
234; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
235; AVX-NEXT:    retq
236  %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
237  %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
238  %cmp = icmp ule <4 x i32> %sh1, %sh2
239  ret <4 x i1> %cmp
240}
241
242define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
243; SSE-LABEL: ugt_v8i16:
244; SSE:       # %bb.0:
245; SSE-NEXT:    psrlw $1, %xmm0
246; SSE-NEXT:    psrlw $1, %xmm1
247; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
248; SSE-NEXT:    retq
249;
250; AVX-LABEL: ugt_v8i16:
251; AVX:       # %bb.0:
252; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
253; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
254; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
255; AVX-NEXT:    retq
256  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
257  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
258  %cmp = icmp ugt <8 x i16> %sh1, %sh2
259  ret <8 x i1> %cmp
260}
261
262define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
263; SSE-LABEL: ult_v8i16:
264; SSE:       # %bb.0:
265; SSE-NEXT:    psrlw $1, %xmm0
266; SSE-NEXT:    psrlw $1, %xmm1
267; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
268; SSE-NEXT:    movdqa %xmm1, %xmm0
269; SSE-NEXT:    retq
270;
271; AVX-LABEL: ult_v8i16:
272; AVX:       # %bb.0:
273; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
274; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
275; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
276; AVX-NEXT:    retq
277  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
278  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
279  %cmp = icmp ult <8 x i16> %sh1, %sh2
280  ret <8 x i1> %cmp
281}
282
283define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) {
284; SSE2-LABEL: uge_v8i16:
285; SSE2:       # %bb.0:
286; SSE2-NEXT:    psrlw $1, %xmm0
287; SSE2-NEXT:    psrlw $1, %xmm1
288; SSE2-NEXT:    pcmpgtw %xmm0, %xmm1
289; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
290; SSE2-NEXT:    pxor %xmm1, %xmm0
291; SSE2-NEXT:    retq
292;
293; SSE41-LABEL: uge_v8i16:
294; SSE41:       # %bb.0:
295; SSE41-NEXT:    psrlw $1, %xmm0
296; SSE41-NEXT:    psrlw $1, %xmm1
297; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
298; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
299; SSE41-NEXT:    retq
300;
301; AVX-LABEL: uge_v8i16:
302; AVX:       # %bb.0:
303; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
304; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
305; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
306; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
307; AVX-NEXT:    retq
308  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
309  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
310  %cmp = icmp uge <8 x i16> %sh1, %sh2
311  ret <8 x i1> %cmp
312}
313
314define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) {
315; SSE2-LABEL: ule_v8i16:
316; SSE2:       # %bb.0:
317; SSE2-NEXT:    psrlw $1, %xmm0
318; SSE2-NEXT:    psrlw $1, %xmm1
319; SSE2-NEXT:    pcmpgtw %xmm1, %xmm0
320; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
321; SSE2-NEXT:    pxor %xmm1, %xmm0
322; SSE2-NEXT:    retq
323;
324; SSE41-LABEL: ule_v8i16:
325; SSE41:       # %bb.0:
326; SSE41-NEXT:    psrlw $1, %xmm0
327; SSE41-NEXT:    psrlw $1, %xmm1
328; SSE41-NEXT:    pminuw %xmm0, %xmm1
329; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
330; SSE41-NEXT:    retq
331;
332; AVX-LABEL: ule_v8i16:
333; AVX:       # %bb.0:
334; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
335; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
336; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
337; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
338; AVX-NEXT:    retq
339  %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340  %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
341  %cmp = icmp ule <8 x i16> %sh1, %sh2
342  ret <8 x i1> %cmp
343}
344
345define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
346; SSE-LABEL: ugt_v16i8:
347; SSE:       # %bb.0:
348; SSE-NEXT:    psrlw $1, %xmm0
349; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
350; SSE-NEXT:    pand %xmm2, %xmm0
351; SSE-NEXT:    psrlw $1, %xmm1
352; SSE-NEXT:    pand %xmm2, %xmm1
353; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
354; SSE-NEXT:    retq
355;
356; AVX-LABEL: ugt_v16i8:
357; AVX:       # %bb.0:
358; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
359; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
360; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
361; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
362; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
363; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
364; AVX-NEXT:    retq
365  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
366  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
367  %cmp = icmp ugt <16 x i8> %sh1, %sh2
368  ret <16 x i1> %cmp
369}
370
371define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
372; SSE-LABEL: ult_v16i8:
373; SSE:       # %bb.0:
374; SSE-NEXT:    psrlw $1, %xmm0
375; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
376; SSE-NEXT:    pand %xmm2, %xmm0
377; SSE-NEXT:    psrlw $1, %xmm1
378; SSE-NEXT:    pand %xmm1, %xmm2
379; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
380; SSE-NEXT:    movdqa %xmm2, %xmm0
381; SSE-NEXT:    retq
382;
383; AVX-LABEL: ult_v16i8:
384; AVX:       # %bb.0:
385; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
386; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
387; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
388; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
389; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
390; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
391; AVX-NEXT:    retq
392  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
393  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
394  %cmp = icmp ult <16 x i8> %sh1, %sh2
395  ret <16 x i1> %cmp
396}
397
398define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) {
399; SSE-LABEL: uge_v16i8:
400; SSE:       # %bb.0:
401; SSE-NEXT:    psrlw $1, %xmm0
402; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
403; SSE-NEXT:    pand %xmm2, %xmm0
404; SSE-NEXT:    psrlw $1, %xmm1
405; SSE-NEXT:    pand %xmm2, %xmm1
406; SSE-NEXT:    pmaxub %xmm0, %xmm1
407; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
408; SSE-NEXT:    retq
409;
410; AVX-LABEL: uge_v16i8:
411; AVX:       # %bb.0:
412; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
413; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
414; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
415; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
416; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
417; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
418; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
419; AVX-NEXT:    retq
420  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
421  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
422  %cmp = icmp uge <16 x i8> %sh1, %sh2
423  ret <16 x i1> %cmp
424}
425
426define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) {
427; SSE-LABEL: ule_v16i8:
428; SSE:       # %bb.0:
429; SSE-NEXT:    psrlw $1, %xmm0
430; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
431; SSE-NEXT:    pand %xmm2, %xmm0
432; SSE-NEXT:    psrlw $1, %xmm1
433; SSE-NEXT:    pand %xmm2, %xmm1
434; SSE-NEXT:    pminub %xmm0, %xmm1
435; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
436; SSE-NEXT:    retq
437;
438; AVX-LABEL: ule_v16i8:
439; AVX:       # %bb.0:
440; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
441; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
442; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
443; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
444; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
445; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm1
446; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
447; AVX-NEXT:    retq
448  %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
449  %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
450  %cmp = icmp ule <16 x i8> %sh1, %sh2
451  ret <16 x i1> %cmp
452}
453
454define <8 x i16> @PR47448_uge(i16 signext %0) {
455; SSE2-LABEL: PR47448_uge:
456; SSE2:       # %bb.0:
457; SSE2-NEXT:    andl $7, %edi
458; SSE2-NEXT:    movd %edi, %xmm0
459; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
460; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
461; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
462; SSE2-NEXT:    pcmpgtw %xmm0, %xmm1
463; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
464; SSE2-NEXT:    pxor %xmm1, %xmm0
465; SSE2-NEXT:    retq
466;
467; SSE41-LABEL: PR47448_uge:
468; SSE41:       # %bb.0:
469; SSE41-NEXT:    andl $7, %edi
470; SSE41-NEXT:    movd %edi, %xmm0
471; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
472; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
473; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
474; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
475; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
476; SSE41-NEXT:    retq
477;
478; AVX1-LABEL: PR47448_uge:
479; AVX1:       # %bb.0:
480; AVX1-NEXT:    andl $7, %edi
481; AVX1-NEXT:    vmovd %edi, %xmm0
482; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
483; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
484; AVX1-NEXT:    vpmaxuw {{.*}}(%rip), %xmm0, %xmm1
485; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
486; AVX1-NEXT:    retq
487;
488; AVX2-LABEL: PR47448_uge:
489; AVX2:       # %bb.0:
490; AVX2-NEXT:    andl $7, %edi
491; AVX2-NEXT:    vmovd %edi, %xmm0
492; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
493; AVX2-NEXT:    vpmaxuw {{.*}}(%rip), %xmm0, %xmm1
494; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
495; AVX2-NEXT:    retq
496  %2 = and i16 %0, 7
497  %3 = insertelement <8 x i16> undef, i16 %2, i32 0
498  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
499  %5 = icmp uge <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
500  %6 = sext <8 x i1> %5 to <8 x i16>
501  ret <8 x i16> %6
502}
503
504define <8 x i16> @PR47448_ugt(i16 signext %0) {
505; SSE-LABEL: PR47448_ugt:
506; SSE:       # %bb.0:
507; SSE-NEXT:    andl $7, %edi
508; SSE-NEXT:    movd %edi, %xmm0
509; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
510; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
511; SSE-NEXT:    pcmpgtw {{.*}}(%rip), %xmm0
512; SSE-NEXT:    retq
513;
514; AVX1-LABEL: PR47448_ugt:
515; AVX1:       # %bb.0:
516; AVX1-NEXT:    andl $7, %edi
517; AVX1-NEXT:    vmovd %edi, %xmm0
518; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
519; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
520; AVX1-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
521; AVX1-NEXT:    retq
522;
523; AVX2-LABEL: PR47448_ugt:
524; AVX2:       # %bb.0:
525; AVX2-NEXT:    andl $7, %edi
526; AVX2-NEXT:    vmovd %edi, %xmm0
527; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
528; AVX2-NEXT:    vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
529; AVX2-NEXT:    retq
530  %2 = and i16 %0, 7
531  %3 = insertelement <8 x i16> undef, i16 %2, i32 0
532  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
533  %5 = icmp ugt <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
534  %6 = sext <8 x i1> %5 to <8 x i16>
535  ret <8 x i16> %6
536}
537