1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
8
9define i32 @andn32(i32 %x, i32 %y)   {
10; X86-LABEL: andn32:
11; X86:       # %bb.0:
12; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
14; X86-NEXT:    retl
15;
16; X64-LABEL: andn32:
17; X64:       # %bb.0:
18; X64-NEXT:    andnl %esi, %edi, %eax
19; X64-NEXT:    retq
20  %tmp1 = xor i32 %x, -1
21  %tmp2 = and i32 %y, %tmp1
22  ret i32 %tmp2
23}
24
25define i32 @andn32_load(i32 %x, i32* %y)   {
26; X86-LABEL: andn32_load:
27; X86:       # %bb.0:
28; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
29; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
30; X86-NEXT:    andnl (%eax), %ecx, %eax
31; X86-NEXT:    retl
32;
33; X64-LABEL: andn32_load:
34; X64:       # %bb.0:
35; X64-NEXT:    andnl (%rsi), %edi, %eax
36; X64-NEXT:    retq
37  %y1 = load i32, i32* %y
38  %tmp1 = xor i32 %x, -1
39  %tmp2 = and i32 %y1, %tmp1
40  ret i32 %tmp2
41}
42
43define i64 @andn64(i64 %x, i64 %y)   {
44; X86-LABEL: andn64:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
48; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
49; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
50; X86-NEXT:    retl
51;
52; X64-LABEL: andn64:
53; X64:       # %bb.0:
54; X64-NEXT:    andnq %rsi, %rdi, %rax
55; X64-NEXT:    retq
56  %tmp1 = xor i64 %x, -1
57  %tmp2 = and i64 %tmp1, %y
58  ret i64 %tmp2
59}
60
61; Don't choose a 'test' if an 'andn' can be used.
62define i1 @andn_cmp(i32 %x, i32 %y) {
63; X86-LABEL: andn_cmp:
64; X86:       # %bb.0:
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
67; X86-NEXT:    sete %al
68; X86-NEXT:    retl
69;
70; X64-LABEL: andn_cmp:
71; X64:       # %bb.0:
72; X64-NEXT:    andnl %esi, %edi, %eax
73; X64-NEXT:    sete %al
74; X64-NEXT:    retq
75  %notx = xor i32 %x, -1
76  %and = and i32 %notx, %y
77  %cmp = icmp eq i32 %and, 0
78  ret i1 %cmp
79}
80
81; Recognize a disguised andn in the following 4 tests.
82define i1 @and_cmp1(i32 %x, i32 %y) {
83; X86-LABEL: and_cmp1:
84; X86:       # %bb.0:
85; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
86; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
87; X86-NEXT:    sete %al
88; X86-NEXT:    retl
89;
90; X64-LABEL: and_cmp1:
91; X64:       # %bb.0:
92; X64-NEXT:    andnl %esi, %edi, %eax
93; X64-NEXT:    sete %al
94; X64-NEXT:    retq
95  %and = and i32 %x, %y
96  %cmp = icmp eq i32 %and, %y
97  ret i1 %cmp
98}
99
100define i1 @and_cmp2(i32 %x, i32 %y) {
101; X86-LABEL: and_cmp2:
102; X86:       # %bb.0:
103; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
104; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
105; X86-NEXT:    setne %al
106; X86-NEXT:    retl
107;
108; X64-LABEL: and_cmp2:
109; X64:       # %bb.0:
110; X64-NEXT:    andnl %esi, %edi, %eax
111; X64-NEXT:    setne %al
112; X64-NEXT:    retq
113  %and = and i32 %y, %x
114  %cmp = icmp ne i32 %and, %y
115  ret i1 %cmp
116}
117
118define i1 @and_cmp3(i32 %x, i32 %y) {
119; X86-LABEL: and_cmp3:
120; X86:       # %bb.0:
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
122; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
123; X86-NEXT:    sete %al
124; X86-NEXT:    retl
125;
126; X64-LABEL: and_cmp3:
127; X64:       # %bb.0:
128; X64-NEXT:    andnl %esi, %edi, %eax
129; X64-NEXT:    sete %al
130; X64-NEXT:    retq
131  %and = and i32 %x, %y
132  %cmp = icmp eq i32 %y, %and
133  ret i1 %cmp
134}
135
136define i1 @and_cmp4(i32 %x, i32 %y) {
137; X86-LABEL: and_cmp4:
138; X86:       # %bb.0:
139; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
140; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
141; X86-NEXT:    setne %al
142; X86-NEXT:    retl
143;
144; X64-LABEL: and_cmp4:
145; X64:       # %bb.0:
146; X64-NEXT:    andnl %esi, %edi, %eax
147; X64-NEXT:    setne %al
148; X64-NEXT:    retq
149  %and = and i32 %y, %x
150  %cmp = icmp ne i32 %y, %and
151  ret i1 %cmp
152}
153
154; A mask and compare against constant is ok for an 'andn' too
155; even though the BMI instruction doesn't have an immediate form.
156define i1 @and_cmp_const(i32 %x) {
157; X86-LABEL: and_cmp_const:
158; X86:       # %bb.0:
159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
160; X86-NEXT:    andl $43, %eax
161; X86-NEXT:    cmpl $43, %eax
162; X86-NEXT:    sete %al
163; X86-NEXT:    retl
164;
165; X64-LABEL: and_cmp_const:
166; X64:       # %bb.0:
167; X64-NEXT:    andl $43, %edi
168; X64-NEXT:    cmpl $43, %edi
169; X64-NEXT:    sete %al
170; X64-NEXT:    retq
171  %and = and i32 %x, 43
172  %cmp = icmp eq i32 %and, 43
173  ret i1 %cmp
174}
175
176; But don't use 'andn' if the mask is a power-of-two.
177define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
178; X86-LABEL: and_cmp_const_power_of_two:
179; X86:       # %bb.0:
180; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
181; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
182; X86-NEXT:    btl %ecx, %eax
183; X86-NEXT:    setae %al
184; X86-NEXT:    retl
185;
186; X64-LABEL: and_cmp_const_power_of_two:
187; X64:       # %bb.0:
188; X64-NEXT:    btl %esi, %edi
189; X64-NEXT:    setae %al
190; X64-NEXT:    retq
191  %shl = shl i32 1, %y
192  %and = and i32 %x, %shl
193  %cmp = icmp ne i32 %and, %shl
194  ret i1 %cmp
195}
196
197; Don't transform to 'andn' if there's another use of the 'and'.
198define i32 @and_cmp_not_one_use(i32 %x) {
199; X86-LABEL: and_cmp_not_one_use:
200; X86:       # %bb.0:
201; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
202; X86-NEXT:    andl $37, %ecx
203; X86-NEXT:    xorl %eax, %eax
204; X86-NEXT:    cmpl $37, %ecx
205; X86-NEXT:    sete %al
206; X86-NEXT:    addl %ecx, %eax
207; X86-NEXT:    retl
208;
209; X64-LABEL: and_cmp_not_one_use:
210; X64:       # %bb.0:
211; X64-NEXT:    andl $37, %edi
212; X64-NEXT:    xorl %eax, %eax
213; X64-NEXT:    cmpl $37, %edi
214; X64-NEXT:    sete %al
215; X64-NEXT:    addl %edi, %eax
216; X64-NEXT:    retq
217  %and = and i32 %x, 37
218  %cmp = icmp eq i32 %and, 37
219  %ext = zext i1 %cmp to i32
220  %add = add i32 %and, %ext
221  ret i32 %add
222}
223
224; Verify that we're not transforming invalid comparison predicates.
225define i1 @not_an_andn1(i32 %x, i32 %y) {
226; X86-LABEL: not_an_andn1:
227; X86:       # %bb.0:
228; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
230; X86-NEXT:    andl %eax, %ecx
231; X86-NEXT:    cmpl %ecx, %eax
232; X86-NEXT:    setg %al
233; X86-NEXT:    retl
234;
235; X64-LABEL: not_an_andn1:
236; X64:       # %bb.0:
237; X64-NEXT:    andl %esi, %edi
238; X64-NEXT:    cmpl %edi, %esi
239; X64-NEXT:    setg %al
240; X64-NEXT:    retq
241  %and = and i32 %x, %y
242  %cmp = icmp sgt i32 %y, %and
243  ret i1 %cmp
244}
245
246define i1 @not_an_andn2(i32 %x, i32 %y) {
247; X86-LABEL: not_an_andn2:
248; X86:       # %bb.0:
249; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
250; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
251; X86-NEXT:    andl %eax, %ecx
252; X86-NEXT:    cmpl %ecx, %eax
253; X86-NEXT:    setbe %al
254; X86-NEXT:    retl
255;
256; X64-LABEL: not_an_andn2:
257; X64:       # %bb.0:
258; X64-NEXT:    andl %esi, %edi
259; X64-NEXT:    cmpl %edi, %esi
260; X64-NEXT:    setbe %al
261; X64-NEXT:    retq
262  %and = and i32 %y, %x
263  %cmp = icmp ule i32 %y, %and
264  ret i1 %cmp
265}
266
267; Don't choose a 'test' if an 'andn' can be used.
268define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
269; X86-LABEL: andn_cmp_swap_ops:
270; X86:       # %bb.0:
271; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
272; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
273; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %ecx
274; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
275; X86-NEXT:    orl %ecx, %eax
276; X86-NEXT:    sete %al
277; X86-NEXT:    retl
278;
279; X64-LABEL: andn_cmp_swap_ops:
280; X64:       # %bb.0:
281; X64-NEXT:    andnq %rsi, %rdi, %rax
282; X64-NEXT:    sete %al
283; X64-NEXT:    retq
284  %notx = xor i64 %x, -1
285  %and = and i64 %y, %notx
286  %cmp = icmp eq i64 %and, 0
287  ret i1 %cmp
288}
289
290; Use a 'test' (not an 'and') because 'andn' only works for i32/i64.
291define i1 @andn_cmp_i8(i8 %x, i8 %y) {
292; X86-LABEL: andn_cmp_i8:
293; X86:       # %bb.0:
294; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
295; X86-NEXT:    notb %al
296; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
297; X86-NEXT:    sete %al
298; X86-NEXT:    retl
299;
300; X64-LABEL: andn_cmp_i8:
301; X64:       # %bb.0:
302; X64-NEXT:    notb %sil
303; X64-NEXT:    testb %sil, %dil
304; X64-NEXT:    sete %al
305; X64-NEXT:    retq
306  %noty = xor i8 %y, -1
307  %and = and i8 %x, %noty
308  %cmp = icmp eq i8 %and, 0
309  ret i1 %cmp
310}
311
312declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
313
314define i32 @bextr32(i32 %x, i32 %y)   {
315; X86-LABEL: bextr32:
316; X86:       # %bb.0:
317; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
318; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
319; X86-NEXT:    retl
320;
321; X64-LABEL: bextr32:
322; X64:       # %bb.0:
323; X64-NEXT:    bextrl %esi, %edi, %eax
324; X64-NEXT:    retq
325  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
326  ret i32 %tmp
327}
328
329define i32 @bextr32_load(i32* %x, i32 %y)   {
330; X86-LABEL: bextr32_load:
331; X86:       # %bb.0:
332; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
333; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
334; X86-NEXT:    bextrl %eax, (%ecx), %eax
335; X86-NEXT:    retl
336;
337; X64-LABEL: bextr32_load:
338; X64:       # %bb.0:
339; X64-NEXT:    bextrl %esi, (%rdi), %eax
340; X64-NEXT:    retq
341  %x1 = load i32, i32* %x
342  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
343  ret i32 %tmp
344}
345
346define i32 @bextr32b(i32 %x)  uwtable  ssp {
347; X86-SLOW-BEXTR-LABEL: bextr32b:
348; X86-SLOW-BEXTR:       # %bb.0:
349; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
350; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
351; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
352; X86-SLOW-BEXTR-NEXT:    retl
353;
354; X64-SLOW-BEXTR-LABEL: bextr32b:
355; X64-SLOW-BEXTR:       # %bb.0:
356; X64-SLOW-BEXTR-NEXT:    movl %edi, %eax
357; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
358; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
359; X64-SLOW-BEXTR-NEXT:    retq
360;
361; X86-FAST-BEXTR-LABEL: bextr32b:
362; X86-FAST-BEXTR:       # %bb.0:
363; X86-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
364; X86-FAST-BEXTR-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
365; X86-FAST-BEXTR-NEXT:    retl
366;
367; X64-FAST-BEXTR-LABEL: bextr32b:
368; X64-FAST-BEXTR:       # %bb.0:
369; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
370; X64-FAST-BEXTR-NEXT:    bextrl %eax, %edi, %eax
371; X64-FAST-BEXTR-NEXT:    retq
372  %1 = lshr i32 %x, 4
373  %2 = and i32 %1, 4095
374  ret i32 %2
375}
376
377; Make sure we still use AH subreg trick to extract 15:8
378define i32 @bextr32_subreg(i32 %x)  uwtable  ssp {
379; X86-LABEL: bextr32_subreg:
380; X86:       # %bb.0:
381; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
382; X86-NEXT:    retl
383;
384; X64-LABEL: bextr32_subreg:
385; X64:       # %bb.0:
386; X64-NEXT:    movl %edi, %eax
387; X64-NEXT:    movzbl %ah, %eax
388; X64-NEXT:    retq
389  %1 = lshr i32 %x, 8
390  %2 = and i32 %1, 255
391  ret i32 %2
392}
393
394define i32 @bextr32b_load(i32* %x)  uwtable  ssp {
395; X86-SLOW-BEXTR-LABEL: bextr32b_load:
396; X86-SLOW-BEXTR:       # %bb.0:
397; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
398; X86-SLOW-BEXTR-NEXT:    movl (%eax), %eax
399; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
400; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
401; X86-SLOW-BEXTR-NEXT:    retl
402;
403; X64-SLOW-BEXTR-LABEL: bextr32b_load:
404; X64-SLOW-BEXTR:       # %bb.0:
405; X64-SLOW-BEXTR-NEXT:    movl (%rdi), %eax
406; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
407; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
408; X64-SLOW-BEXTR-NEXT:    retq
409;
410; X86-FAST-BEXTR-LABEL: bextr32b_load:
411; X86-FAST-BEXTR:       # %bb.0:
412; X86-FAST-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
413; X86-FAST-BEXTR-NEXT:    movl $3076, %ecx # imm = 0xC04
414; X86-FAST-BEXTR-NEXT:    bextrl %ecx, (%eax), %eax
415; X86-FAST-BEXTR-NEXT:    retl
416;
417; X64-FAST-BEXTR-LABEL: bextr32b_load:
418; X64-FAST-BEXTR:       # %bb.0:
419; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
420; X64-FAST-BEXTR-NEXT:    bextrl %eax, (%rdi), %eax
421; X64-FAST-BEXTR-NEXT:    retq
422  %1 = load i32, i32* %x
423  %2 = lshr i32 %1, 4
424  %3 = and i32 %2, 4095
425  ret i32 %3
426}
427
428; PR34042
429define i32 @bextr32c(i32 %x, i16 zeroext %y) {
430; X86-LABEL: bextr32c:
431; X86:       # %bb.0:
432; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
433; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
434; X86-NEXT:    retl
435;
436; X64-LABEL: bextr32c:
437; X64:       # %bb.0:
438; X64-NEXT:    bextrl %esi, %edi, %eax
439; X64-NEXT:    retq
440  %tmp0 = sext i16 %y to i32
441  %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
442  ret i32 %tmp1
443}
444
445define i32 @non_bextr32(i32 %x) {
446; X86-LABEL: non_bextr32:
447; X86:       # %bb.0: # %entry
448; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
449; X86-NEXT:    shrl $2, %eax
450; X86-NEXT:    andl $111, %eax
451; X86-NEXT:    retl
452;
453; X64-LABEL: non_bextr32:
454; X64:       # %bb.0: # %entry
455; X64-NEXT:    movl %edi, %eax
456; X64-NEXT:    shrl $2, %eax
457; X64-NEXT:    andl $111, %eax
458; X64-NEXT:    retq
459entry:
460  %shr = lshr i32 %x, 2
461  %and = and i32 %shr, 111
462  ret i32 %and
463}
464
465define i32 @blsi32(i32 %x)   {
466; X86-LABEL: blsi32:
467; X86:       # %bb.0:
468; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
469; X86-NEXT:    retl
470;
471; X64-LABEL: blsi32:
472; X64:       # %bb.0:
473; X64-NEXT:    blsil %edi, %eax
474; X64-NEXT:    retq
475  %tmp = sub i32 0, %x
476  %tmp2 = and i32 %x, %tmp
477  ret i32 %tmp2
478}
479
480define i32 @blsi32_load(i32* %x)   {
481; X86-LABEL: blsi32_load:
482; X86:       # %bb.0:
483; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
484; X86-NEXT:    blsil (%eax), %eax
485; X86-NEXT:    retl
486;
487; X64-LABEL: blsi32_load:
488; X64:       # %bb.0:
489; X64-NEXT:    blsil (%rdi), %eax
490; X64-NEXT:    retq
491  %x1 = load i32, i32* %x
492  %tmp = sub i32 0, %x1
493  %tmp2 = and i32 %x1, %tmp
494  ret i32 %tmp2
495}
496
497define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
498; X86-LABEL: blsi32_z:
499; X86:       # %bb.0:
500; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
501; X86-NEXT:    jne .LBB24_2
502; X86-NEXT:  # %bb.1:
503; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
504; X86-NEXT:  .LBB24_2:
505; X86-NEXT:    retl
506;
507; X64-LABEL: blsi32_z:
508; X64:       # %bb.0:
509; X64-NEXT:    blsil %edi, %eax
510; X64-NEXT:    cmovel %esi, %eax
511; X64-NEXT:    retq
512  %t0 = sub i32 0, %a
513  %t1 = and i32 %t0, %a
514  %t2 = icmp eq i32 %t1, 0
515  %t3 = select i1 %t2, i32 %b, i32 %t1
516  ret i32 %t3
517}
518
519define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
520; X86-LABEL: blsi32_z2:
521; X86:       # %bb.0:
522; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
523; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
524; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
525; X86-NEXT:    cmovel %eax, %ecx
526; X86-NEXT:    movl (%ecx), %eax
527; X86-NEXT:    retl
528;
529; X64-LABEL: blsi32_z2:
530; X64:       # %bb.0:
531; X64-NEXT:    movl %esi, %eax
532; X64-NEXT:    blsil %edi, %ecx
533; X64-NEXT:    cmovnel %edx, %eax
534; X64-NEXT:    retq
535  %t0 = sub i32 0, %a
536  %t1 = and i32 %t0, %a
537  %t2 = icmp eq i32 %t1, 0
538  %t3 = select i1 %t2, i32 %b, i32 %c
539  ret i32 %t3
540}
541
542define i64 @blsi64(i64 %x)   {
543; X86-LABEL: blsi64:
544; X86:       # %bb.0:
545; X86-NEXT:    pushl %esi
546; X86-NEXT:    .cfi_def_cfa_offset 8
547; X86-NEXT:    .cfi_offset %esi, -8
548; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
549; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
550; X86-NEXT:    xorl %edx, %edx
551; X86-NEXT:    movl %ecx, %eax
552; X86-NEXT:    negl %eax
553; X86-NEXT:    sbbl %esi, %edx
554; X86-NEXT:    andl %esi, %edx
555; X86-NEXT:    andl %ecx, %eax
556; X86-NEXT:    popl %esi
557; X86-NEXT:    .cfi_def_cfa_offset 4
558; X86-NEXT:    retl
559;
560; X64-LABEL: blsi64:
561; X64:       # %bb.0:
562; X64-NEXT:    blsiq %rdi, %rax
563; X64-NEXT:    retq
564  %tmp = sub i64 0, %x
565  %tmp2 = and i64 %tmp, %x
566  ret i64 %tmp2
567}
568
569define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
570; X86-LABEL: blsi64_z:
571; X86:       # %bb.0:
572; X86-NEXT:    pushl %esi
573; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
574; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
575; X86-NEXT:    xorl %edx, %edx
576; X86-NEXT:    movl %ecx, %eax
577; X86-NEXT:    negl %eax
578; X86-NEXT:    sbbl %esi, %edx
579; X86-NEXT:    andl %esi, %edx
580; X86-NEXT:    andl %ecx, %eax
581; X86-NEXT:    movl %eax, %ecx
582; X86-NEXT:    orl %edx, %ecx
583; X86-NEXT:    jne .LBB27_2
584; X86-NEXT:  # %bb.1:
585; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
586; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
587; X86-NEXT:  .LBB27_2:
588; X86-NEXT:    popl %esi
589; X86-NEXT:    retl
590;
591; X64-LABEL: blsi64_z:
592; X64:       # %bb.0:
593; X64-NEXT:    blsiq %rdi, %rax
594; X64-NEXT:    cmoveq %rsi, %rax
595; X64-NEXT:    retq
596  %t0 = sub i64 0, %a
597  %t1 = and i64 %t0, %a
598  %t2 = icmp eq i64 %t1, 0
599  %t3 = select i1 %t2, i64 %b, i64 %t1
600  ret i64 %t3
601}
602
603define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
604; X86-LABEL: blsi64_z2:
605; X86:       # %bb.0:
606; X86-NEXT:    pushl %esi
607; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
608; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
609; X86-NEXT:    xorl %edx, %edx
610; X86-NEXT:    movl %eax, %esi
611; X86-NEXT:    negl %esi
612; X86-NEXT:    sbbl %ecx, %edx
613; X86-NEXT:    andl %ecx, %edx
614; X86-NEXT:    andl %eax, %esi
615; X86-NEXT:    orl %edx, %esi
616; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
617; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
618; X86-NEXT:    cmovel %eax, %ecx
619; X86-NEXT:    movl (%ecx), %eax
620; X86-NEXT:    movl 4(%ecx), %edx
621; X86-NEXT:    popl %esi
622; X86-NEXT:    retl
623;
624; X64-LABEL: blsi64_z2:
625; X64:       # %bb.0:
626; X64-NEXT:    movq %rsi, %rax
627; X64-NEXT:    blsiq %rdi, %rcx
628; X64-NEXT:    cmovneq %rdx, %rax
629; X64-NEXT:    retq
630  %t0 = sub i64 0, %a
631  %t1 = and i64 %t0, %a
632  %t2 = icmp eq i64 %t1, 0
633  %t3 = select i1 %t2, i64 %b, i64 %c
634  ret i64 %t3
635}
636
637define i32 @blsmsk32(i32 %x)   {
638; X86-LABEL: blsmsk32:
639; X86:       # %bb.0:
640; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
641; X86-NEXT:    retl
642;
643; X64-LABEL: blsmsk32:
644; X64:       # %bb.0:
645; X64-NEXT:    blsmskl %edi, %eax
646; X64-NEXT:    retq
647  %tmp = sub i32 %x, 1
648  %tmp2 = xor i32 %x, %tmp
649  ret i32 %tmp2
650}
651
652define i32 @blsmsk32_load(i32* %x)   {
653; X86-LABEL: blsmsk32_load:
654; X86:       # %bb.0:
655; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
656; X86-NEXT:    blsmskl (%eax), %eax
657; X86-NEXT:    retl
658;
659; X64-LABEL: blsmsk32_load:
660; X64:       # %bb.0:
661; X64-NEXT:    blsmskl (%rdi), %eax
662; X64-NEXT:    retq
663  %x1 = load i32, i32* %x
664  %tmp = sub i32 %x1, 1
665  %tmp2 = xor i32 %x1, %tmp
666  ret i32 %tmp2
667}
668
669define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
670; X86-LABEL: blsmsk32_z:
671; X86:       # %bb.0:
672; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
673; X86-NEXT:    jne .LBB31_2
674; X86-NEXT:  # %bb.1:
675; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
676; X86-NEXT:  .LBB31_2:
677; X86-NEXT:    retl
678;
679; X64-LABEL: blsmsk32_z:
680; X64:       # %bb.0:
681; X64-NEXT:    blsmskl %edi, %eax
682; X64-NEXT:    cmovel %esi, %eax
683; X64-NEXT:    retq
684  %t0 = sub i32 %a, 1
685  %t1 = xor i32 %t0, %a
686  %t2 = icmp eq i32 %t1, 0
687  %t3 = select i1 %t2, i32 %b, i32 %t1
688  ret i32 %t3
689}
690
691define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
692; X86-LABEL: blsmsk32_z2:
693; X86:       # %bb.0:
694; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
695; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
696; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
697; X86-NEXT:    cmovel %eax, %ecx
698; X86-NEXT:    movl (%ecx), %eax
699; X86-NEXT:    retl
700;
701; X64-LABEL: blsmsk32_z2:
702; X64:       # %bb.0:
703; X64-NEXT:    movl %esi, %eax
704; X64-NEXT:    blsmskl %edi, %ecx
705; X64-NEXT:    cmovnel %edx, %eax
706; X64-NEXT:    retq
707  %t0 = sub i32 %a, 1
708  %t1 = xor i32 %t0, %a
709  %t2 = icmp eq i32 %t1, 0
710  %t3 = select i1 %t2, i32 %b, i32 %c
711  ret i32 %t3
712}
713
714define i64 @blsmsk64(i64 %x)   {
715; X86-LABEL: blsmsk64:
716; X86:       # %bb.0:
717; X86-NEXT:    pushl %esi
718; X86-NEXT:    .cfi_def_cfa_offset 8
719; X86-NEXT:    .cfi_offset %esi, -8
720; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
721; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
722; X86-NEXT:    movl %ecx, %eax
723; X86-NEXT:    addl $-1, %eax
724; X86-NEXT:    movl %esi, %edx
725; X86-NEXT:    adcl $-1, %edx
726; X86-NEXT:    xorl %ecx, %eax
727; X86-NEXT:    xorl %esi, %edx
728; X86-NEXT:    popl %esi
729; X86-NEXT:    .cfi_def_cfa_offset 4
730; X86-NEXT:    retl
731;
732; X64-LABEL: blsmsk64:
733; X64:       # %bb.0:
734; X64-NEXT:    blsmskq %rdi, %rax
735; X64-NEXT:    retq
736  %tmp = sub i64 %x, 1
737  %tmp2 = xor i64 %tmp, %x
738  ret i64 %tmp2
739}
740
741define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
742; X86-LABEL: blsmsk64_z:
743; X86:       # %bb.0:
744; X86-NEXT:    pushl %esi
745; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
746; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
747; X86-NEXT:    movl %ecx, %eax
748; X86-NEXT:    addl $-1, %eax
749; X86-NEXT:    movl %esi, %edx
750; X86-NEXT:    adcl $-1, %edx
751; X86-NEXT:    xorl %ecx, %eax
752; X86-NEXT:    xorl %esi, %edx
753; X86-NEXT:    movl %eax, %ecx
754; X86-NEXT:    orl %edx, %ecx
755; X86-NEXT:    jne .LBB34_2
756; X86-NEXT:  # %bb.1:
757; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
758; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
759; X86-NEXT:  .LBB34_2:
760; X86-NEXT:    popl %esi
761; X86-NEXT:    retl
762;
763; X64-LABEL: blsmsk64_z:
764; X64:       # %bb.0:
765; X64-NEXT:    blsmskq %rdi, %rax
766; X64-NEXT:    cmoveq %rsi, %rax
767; X64-NEXT:    retq
768  %t0 = sub i64 %a, 1
769  %t1 = xor i64 %t0, %a
770  %t2 = icmp eq i64 %t1, 0
771  %t3 = select i1 %t2, i64 %b, i64 %t1
772  ret i64 %t3
773}
774
775define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
776; X86-LABEL: blsmsk64_z2:
777; X86:       # %bb.0:
778; X86-NEXT:    pushl %esi
779; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
780; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
781; X86-NEXT:    movl %eax, %edx
782; X86-NEXT:    addl $-1, %edx
783; X86-NEXT:    movl %ecx, %esi
784; X86-NEXT:    adcl $-1, %esi
785; X86-NEXT:    xorl %eax, %edx
786; X86-NEXT:    xorl %ecx, %esi
787; X86-NEXT:    orl %edx, %esi
788; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
789; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
790; X86-NEXT:    cmovel %eax, %ecx
791; X86-NEXT:    movl (%ecx), %eax
792; X86-NEXT:    movl 4(%ecx), %edx
793; X86-NEXT:    popl %esi
794; X86-NEXT:    retl
795;
796; X64-LABEL: blsmsk64_z2:
797; X64:       # %bb.0:
798; X64-NEXT:    movq %rsi, %rax
799; X64-NEXT:    blsmskq %rdi, %rcx
800; X64-NEXT:    cmovneq %rdx, %rax
801; X64-NEXT:    retq
802  %t0 = sub i64 %a, 1
803  %t1 = xor i64 %t0, %a
804  %t2 = icmp eq i64 %t1, 0
805  %t3 = select i1 %t2, i64 %b, i64 %c
806  ret i64 %t3
807}
808
809define i32 @blsr32(i32 %x)   {
810; X86-LABEL: blsr32:
811; X86:       # %bb.0:
812; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
813; X86-NEXT:    retl
814;
815; X64-LABEL: blsr32:
816; X64:       # %bb.0:
817; X64-NEXT:    blsrl %edi, %eax
818; X64-NEXT:    retq
819  %tmp = sub i32 %x, 1
820  %tmp2 = and i32 %x, %tmp
821  ret i32 %tmp2
822}
823
824define i32 @blsr32_load(i32* %x)   {
825; X86-LABEL: blsr32_load:
826; X86:       # %bb.0:
827; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
828; X86-NEXT:    blsrl (%eax), %eax
829; X86-NEXT:    retl
830;
831; X64-LABEL: blsr32_load:
832; X64:       # %bb.0:
833; X64-NEXT:    blsrl (%rdi), %eax
834; X64-NEXT:    retq
835  %x1 = load i32, i32* %x
836  %tmp = sub i32 %x1, 1
837  %tmp2 = and i32 %x1, %tmp
838  ret i32 %tmp2
839}
840
841define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
842; X86-LABEL: blsr32_z:
843; X86:       # %bb.0:
844; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
845; X86-NEXT:    jne .LBB38_2
846; X86-NEXT:  # %bb.1:
847; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
848; X86-NEXT:  .LBB38_2:
849; X86-NEXT:    retl
850;
851; X64-LABEL: blsr32_z:
852; X64:       # %bb.0:
853; X64-NEXT:    blsrl %edi, %eax
854; X64-NEXT:    cmovel %esi, %eax
855; X64-NEXT:    retq
856  %t0 = sub i32 %a, 1
857  %t1 = and i32 %t0, %a
858  %t2 = icmp eq i32 %t1, 0
859  %t3 = select i1 %t2, i32 %b, i32 %t1
860  ret i32 %t3
861}
862
863define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
864; X86-LABEL: blsr32_z2:
865; X86:       # %bb.0:
866; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
867; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
868; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
869; X86-NEXT:    cmovel %eax, %ecx
870; X86-NEXT:    movl (%ecx), %eax
871; X86-NEXT:    retl
872;
873; X64-LABEL: blsr32_z2:
874; X64:       # %bb.0:
875; X64-NEXT:    movl %esi, %eax
876; X64-NEXT:    blsrl %edi, %ecx
877; X64-NEXT:    cmovnel %edx, %eax
878; X64-NEXT:    retq
879  %t0 = sub i32 %a, 1
880  %t1 = and i32 %t0, %a
881  %t2 = icmp eq i32 %t1, 0
882  %t3 = select i1 %t2, i32 %b, i32 %c
883  ret i32 %t3
884}
885
886define i64 @blsr64(i64 %x)   {
887; X86-LABEL: blsr64:
888; X86:       # %bb.0:
889; X86-NEXT:    pushl %esi
890; X86-NEXT:    .cfi_def_cfa_offset 8
891; X86-NEXT:    .cfi_offset %esi, -8
892; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
893; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
894; X86-NEXT:    movl %ecx, %eax
895; X86-NEXT:    addl $-1, %eax
896; X86-NEXT:    movl %esi, %edx
897; X86-NEXT:    adcl $-1, %edx
898; X86-NEXT:    andl %ecx, %eax
899; X86-NEXT:    andl %esi, %edx
900; X86-NEXT:    popl %esi
901; X86-NEXT:    .cfi_def_cfa_offset 4
902; X86-NEXT:    retl
903;
904; X64-LABEL: blsr64:
905; X64:       # %bb.0:
906; X64-NEXT:    blsrq %rdi, %rax
907; X64-NEXT:    retq
908  %tmp = sub i64 %x, 1
909  %tmp2 = and i64 %tmp, %x
910  ret i64 %tmp2
911}
912
913define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
914; X86-LABEL: blsr64_z:
915; X86:       # %bb.0:
916; X86-NEXT:    pushl %esi
917; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
918; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
919; X86-NEXT:    movl %ecx, %eax
920; X86-NEXT:    addl $-1, %eax
921; X86-NEXT:    movl %esi, %edx
922; X86-NEXT:    adcl $-1, %edx
923; X86-NEXT:    andl %ecx, %eax
924; X86-NEXT:    andl %esi, %edx
925; X86-NEXT:    movl %eax, %ecx
926; X86-NEXT:    orl %edx, %ecx
927; X86-NEXT:    jne .LBB41_2
928; X86-NEXT:  # %bb.1:
929; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
930; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
931; X86-NEXT:  .LBB41_2:
932; X86-NEXT:    popl %esi
933; X86-NEXT:    retl
934;
935; X64-LABEL: blsr64_z:
936; X64:       # %bb.0:
937; X64-NEXT:    blsrq %rdi, %rax
938; X64-NEXT:    cmoveq %rsi, %rax
939; X64-NEXT:    retq
940  %t0 = sub i64 %a, 1
941  %t1 = and i64 %t0, %a
942  %t2 = icmp eq i64 %t1, 0
943  %t3 = select i1 %t2, i64 %b, i64 %t1
944  ret i64 %t3
945}
946
947define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
948; X86-LABEL: blsr64_z2:
949; X86:       # %bb.0:
950; X86-NEXT:    pushl %esi
951; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
952; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
953; X86-NEXT:    movl %eax, %edx
954; X86-NEXT:    addl $-1, %edx
955; X86-NEXT:    movl %ecx, %esi
956; X86-NEXT:    adcl $-1, %esi
957; X86-NEXT:    andl %eax, %edx
958; X86-NEXT:    andl %ecx, %esi
959; X86-NEXT:    orl %edx, %esi
960; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
961; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
962; X86-NEXT:    cmovel %eax, %ecx
963; X86-NEXT:    movl (%ecx), %eax
964; X86-NEXT:    movl 4(%ecx), %edx
965; X86-NEXT:    popl %esi
966; X86-NEXT:    retl
967;
968; X64-LABEL: blsr64_z2:
969; X64:       # %bb.0:
970; X64-NEXT:    movq %rsi, %rax
971; X64-NEXT:    blsrq %rdi, %rcx
972; X64-NEXT:    cmovneq %rdx, %rax
973; X64-NEXT:    retq
974  %t0 = sub i64 %a, 1
975  %t1 = and i64 %t0, %a
976  %t2 = icmp eq i64 %t1, 0
977  %t3 = select i1 %t2, i64 %b, i64 %c
978  ret i64 %t3
979}
980
981; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
982
983define i64 @blsr_disguised_constant(i64 %x) {
984; X86-LABEL: blsr_disguised_constant:
985; X86:       # %bb.0:
986; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
987; X86-NEXT:    movzwl %ax, %eax
988; X86-NEXT:    xorl %edx, %edx
989; X86-NEXT:    retl
990;
991; X64-LABEL: blsr_disguised_constant:
992; X64:       # %bb.0:
993; X64-NEXT:    blsrl %edi, %eax
994; X64-NEXT:    movzwl %ax, %eax
995; X64-NEXT:    retq
996  %a1 = and i64 %x, 65535
997  %a2 = add i64 %x, 65535
998  %r = and i64 %a1, %a2
999  ret i64 %r
1000}
1001
1002; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
1003; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
1004define i64 @blsr_disguised_shrunk_add(i64 %x) {
1005; X86-LABEL: blsr_disguised_shrunk_add:
1006; X86:       # %bb.0:
1007; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1008; X86-NEXT:    shrl $16, %eax
1009; X86-NEXT:    blsrl %eax, %eax
1010; X86-NEXT:    xorl %edx, %edx
1011; X86-NEXT:    retl
1012;
1013; X64-LABEL: blsr_disguised_shrunk_add:
1014; X64:       # %bb.0:
1015; X64-NEXT:    shrq $48, %rdi
1016; X64-NEXT:    blsrl %edi, %eax
1017; X64-NEXT:    retq
1018  %a = lshr i64 %x, 48
1019  %b = add i64 %a, -1
1020  %c = and i64 %b, %a
1021  ret i64 %c
1022}
1023
1024; FIXME: We should not be using the S flag from BEXTR.
1025define void @pr40060(i32, i32) {
1026; X86-LABEL: pr40060:
1027; X86:       # %bb.0:
1028; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1029; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1030; X86-NEXT:    testl %eax, %eax
1031; X86-NEXT:    js .LBB45_1
1032; X86-NEXT:  # %bb.2:
1033; X86-NEXT:    jmp bar # TAILCALL
1034; X86-NEXT:  .LBB45_1:
1035; X86-NEXT:    retl
1036;
1037; X64-LABEL: pr40060:
1038; X64:       # %bb.0:
1039; X64-NEXT:    bextrl %esi, %edi, %eax
1040; X64-NEXT:    testl %eax, %eax
1041; X64-NEXT:    js .LBB45_1
1042; X64-NEXT:  # %bb.2:
1043; X64-NEXT:    jmp bar # TAILCALL
1044; X64-NEXT:  .LBB45_1:
1045; X64-NEXT:    retq
1046  %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
1047  %4 = icmp sgt i32 %3, -1
1048  br i1 %4, label %5, label %6
1049
1050  tail call void @bar()
1051  br label %6
1052
1053  ret void
1054}
1055
1056define i32 @blsr32_branch(i32 %x) {
1057; X86-LABEL: blsr32_branch:
1058; X86:       # %bb.0:
1059; X86-NEXT:    pushl %esi
1060; X86-NEXT:    .cfi_def_cfa_offset 8
1061; X86-NEXT:    .cfi_offset %esi, -8
1062; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %esi
1063; X86-NEXT:    jne .LBB46_2
1064; X86-NEXT:  # %bb.1:
1065; X86-NEXT:    calll bar
1066; X86-NEXT:  .LBB46_2:
1067; X86-NEXT:    movl %esi, %eax
1068; X86-NEXT:    popl %esi
1069; X86-NEXT:    .cfi_def_cfa_offset 4
1070; X86-NEXT:    retl
1071;
1072; X64-LABEL: blsr32_branch:
1073; X64:       # %bb.0:
1074; X64-NEXT:    pushq %rbx
1075; X64-NEXT:    .cfi_def_cfa_offset 16
1076; X64-NEXT:    .cfi_offset %rbx, -16
1077; X64-NEXT:    blsrl %edi, %ebx
1078; X64-NEXT:    jne .LBB46_2
1079; X64-NEXT:  # %bb.1:
1080; X64-NEXT:    callq bar
1081; X64-NEXT:  .LBB46_2:
1082; X64-NEXT:    movl %ebx, %eax
1083; X64-NEXT:    popq %rbx
1084; X64-NEXT:    .cfi_def_cfa_offset 8
1085; X64-NEXT:    retq
1086  %tmp = sub i32 %x, 1
1087  %tmp2 = and i32 %x, %tmp
1088  %cmp = icmp eq i32 %tmp2, 0
1089  br i1 %cmp, label %1, label %2
1090
1091  tail call void @bar()
1092  br label %2
1093  ret i32 %tmp2
1094}
1095
1096define i64 @blsr64_branch(i64 %x) {
1097; X86-LABEL: blsr64_branch:
1098; X86:       # %bb.0:
1099; X86-NEXT:    pushl %edi
1100; X86-NEXT:    .cfi_def_cfa_offset 8
1101; X86-NEXT:    pushl %esi
1102; X86-NEXT:    .cfi_def_cfa_offset 12
1103; X86-NEXT:    .cfi_offset %esi, -12
1104; X86-NEXT:    .cfi_offset %edi, -8
1105; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1106; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1107; X86-NEXT:    movl %eax, %esi
1108; X86-NEXT:    addl $-1, %esi
1109; X86-NEXT:    movl %ecx, %edi
1110; X86-NEXT:    adcl $-1, %edi
1111; X86-NEXT:    andl %eax, %esi
1112; X86-NEXT:    andl %ecx, %edi
1113; X86-NEXT:    movl %esi, %eax
1114; X86-NEXT:    orl %edi, %eax
1115; X86-NEXT:    jne .LBB47_2
1116; X86-NEXT:  # %bb.1:
1117; X86-NEXT:    calll bar
1118; X86-NEXT:  .LBB47_2:
1119; X86-NEXT:    movl %esi, %eax
1120; X86-NEXT:    movl %edi, %edx
1121; X86-NEXT:    popl %esi
1122; X86-NEXT:    .cfi_def_cfa_offset 8
1123; X86-NEXT:    popl %edi
1124; X86-NEXT:    .cfi_def_cfa_offset 4
1125; X86-NEXT:    retl
1126;
1127; X64-LABEL: blsr64_branch:
1128; X64:       # %bb.0:
1129; X64-NEXT:    pushq %rbx
1130; X64-NEXT:    .cfi_def_cfa_offset 16
1131; X64-NEXT:    .cfi_offset %rbx, -16
1132; X64-NEXT:    blsrq %rdi, %rbx
1133; X64-NEXT:    jne .LBB47_2
1134; X64-NEXT:  # %bb.1:
1135; X64-NEXT:    callq bar
1136; X64-NEXT:  .LBB47_2:
1137; X64-NEXT:    movq %rbx, %rax
1138; X64-NEXT:    popq %rbx
1139; X64-NEXT:    .cfi_def_cfa_offset 8
1140; X64-NEXT:    retq
1141  %tmp = sub i64 %x, 1
1142  %tmp2 = and i64 %x, %tmp
1143  %cmp = icmp eq i64 %tmp2, 0
1144  br i1 %cmp, label %1, label %2
1145
1146  tail call void @bar()
1147  br label %2
1148  ret i64 %tmp2
1149}
1150
1151define i32 @blsi32_branch(i32 %x) {
1152; X86-LABEL: blsi32_branch:
1153; X86:       # %bb.0:
1154; X86-NEXT:    pushl %esi
1155; X86-NEXT:    .cfi_def_cfa_offset 8
1156; X86-NEXT:    .cfi_offset %esi, -8
1157; X86-NEXT:    blsil {{[0-9]+}}(%esp), %esi
1158; X86-NEXT:    jne .LBB48_2
1159; X86-NEXT:  # %bb.1:
1160; X86-NEXT:    calll bar
1161; X86-NEXT:  .LBB48_2:
1162; X86-NEXT:    movl %esi, %eax
1163; X86-NEXT:    popl %esi
1164; X86-NEXT:    .cfi_def_cfa_offset 4
1165; X86-NEXT:    retl
1166;
1167; X64-LABEL: blsi32_branch:
1168; X64:       # %bb.0:
1169; X64-NEXT:    pushq %rbx
1170; X64-NEXT:    .cfi_def_cfa_offset 16
1171; X64-NEXT:    .cfi_offset %rbx, -16
1172; X64-NEXT:    blsil %edi, %ebx
1173; X64-NEXT:    jne .LBB48_2
1174; X64-NEXT:  # %bb.1:
1175; X64-NEXT:    callq bar
1176; X64-NEXT:  .LBB48_2:
1177; X64-NEXT:    movl %ebx, %eax
1178; X64-NEXT:    popq %rbx
1179; X64-NEXT:    .cfi_def_cfa_offset 8
1180; X64-NEXT:    retq
1181  %tmp = sub i32 0, %x
1182  %tmp2 = and i32 %x, %tmp
1183  %cmp = icmp eq i32 %tmp2, 0
1184  br i1 %cmp, label %1, label %2
1185
1186  tail call void @bar()
1187  br label %2
1188  ret i32 %tmp2
1189}
1190
1191define i64 @blsi64_branch(i64 %x) {
1192; X86-LABEL: blsi64_branch:
1193; X86:       # %bb.0:
1194; X86-NEXT:    pushl %edi
1195; X86-NEXT:    .cfi_def_cfa_offset 8
1196; X86-NEXT:    pushl %esi
1197; X86-NEXT:    .cfi_def_cfa_offset 12
1198; X86-NEXT:    .cfi_offset %esi, -12
1199; X86-NEXT:    .cfi_offset %edi, -8
1200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1201; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1202; X86-NEXT:    xorl %esi, %esi
1203; X86-NEXT:    movl %eax, %edi
1204; X86-NEXT:    negl %edi
1205; X86-NEXT:    sbbl %ecx, %esi
1206; X86-NEXT:    andl %ecx, %esi
1207; X86-NEXT:    andl %eax, %edi
1208; X86-NEXT:    movl %edi, %eax
1209; X86-NEXT:    orl %esi, %eax
1210; X86-NEXT:    jne .LBB49_2
1211; X86-NEXT:  # %bb.1:
1212; X86-NEXT:    calll bar
1213; X86-NEXT:  .LBB49_2:
1214; X86-NEXT:    movl %edi, %eax
1215; X86-NEXT:    movl %esi, %edx
1216; X86-NEXT:    popl %esi
1217; X86-NEXT:    .cfi_def_cfa_offset 8
1218; X86-NEXT:    popl %edi
1219; X86-NEXT:    .cfi_def_cfa_offset 4
1220; X86-NEXT:    retl
1221;
1222; X64-LABEL: blsi64_branch:
1223; X64:       # %bb.0:
1224; X64-NEXT:    pushq %rbx
1225; X64-NEXT:    .cfi_def_cfa_offset 16
1226; X64-NEXT:    .cfi_offset %rbx, -16
1227; X64-NEXT:    blsiq %rdi, %rbx
1228; X64-NEXT:    jne .LBB49_2
1229; X64-NEXT:  # %bb.1:
1230; X64-NEXT:    callq bar
1231; X64-NEXT:  .LBB49_2:
1232; X64-NEXT:    movq %rbx, %rax
1233; X64-NEXT:    popq %rbx
1234; X64-NEXT:    .cfi_def_cfa_offset 8
1235; X64-NEXT:    retq
1236  %tmp = sub i64 0, %x
1237  %tmp2 = and i64 %x, %tmp
1238  %cmp = icmp eq i64 %tmp2, 0
1239  br i1 %cmp, label %1, label %2
1240
1241  tail call void @bar()
1242  br label %2
1243  ret i64 %tmp2
1244}
1245
1246declare dso_local void @bar()
1247
1248define void @pr42118_i32(i32 %x) {
1249; X86-LABEL: pr42118_i32:
1250; X86:       # %bb.0:
1251; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1252; X86-NEXT:    jne .LBB50_1
1253; X86-NEXT:  # %bb.2:
1254; X86-NEXT:    jmp bar # TAILCALL
1255; X86-NEXT:  .LBB50_1:
1256; X86-NEXT:    retl
1257;
1258; X64-LABEL: pr42118_i32:
1259; X64:       # %bb.0:
1260; X64-NEXT:    blsrl %edi, %eax
1261; X64-NEXT:    jne .LBB50_1
1262; X64-NEXT:  # %bb.2:
1263; X64-NEXT:    jmp bar # TAILCALL
1264; X64-NEXT:  .LBB50_1:
1265; X64-NEXT:    retq
1266  %tmp = sub i32 0, %x
1267  %tmp1 = and i32 %tmp, %x
1268  %cmp = icmp eq i32 %tmp1, %x
1269  br i1 %cmp, label %1, label %2
1270
1271  tail call void @bar()
1272  br label %2
1273
1274  ret void
1275}
1276
1277define void @pr42118_i64(i64 %x) {
1278; X86-LABEL: pr42118_i64:
1279; X86:       # %bb.0:
1280; X86-NEXT:    pushl %esi
1281; X86-NEXT:    .cfi_def_cfa_offset 8
1282; X86-NEXT:    .cfi_offset %esi, -8
1283; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1284; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1285; X86-NEXT:    movl %eax, %edx
1286; X86-NEXT:    addl $-1, %edx
1287; X86-NEXT:    movl %ecx, %esi
1288; X86-NEXT:    adcl $-1, %esi
1289; X86-NEXT:    andl %eax, %edx
1290; X86-NEXT:    andl %ecx, %esi
1291; X86-NEXT:    orl %edx, %esi
1292; X86-NEXT:    jne .LBB51_1
1293; X86-NEXT:  # %bb.2:
1294; X86-NEXT:    popl %esi
1295; X86-NEXT:    .cfi_def_cfa_offset 4
1296; X86-NEXT:    jmp bar # TAILCALL
1297; X86-NEXT:  .LBB51_1:
1298; X86-NEXT:    .cfi_def_cfa_offset 8
1299; X86-NEXT:    popl %esi
1300; X86-NEXT:    .cfi_def_cfa_offset 4
1301; X86-NEXT:    retl
1302;
1303; X64-LABEL: pr42118_i64:
1304; X64:       # %bb.0:
1305; X64-NEXT:    blsrq %rdi, %rax
1306; X64-NEXT:    jne .LBB51_1
1307; X64-NEXT:  # %bb.2:
1308; X64-NEXT:    jmp bar # TAILCALL
1309; X64-NEXT:  .LBB51_1:
1310; X64-NEXT:    retq
1311  %tmp = sub i64 0, %x
1312  %tmp1 = and i64 %tmp, %x
1313  %cmp = icmp eq i64 %tmp1, %x
1314  br i1 %cmp, label %1, label %2
1315
1316  tail call void @bar()
1317  br label %2
1318
1319  ret void
1320}
1321
1322define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
1323; X86-LABEL: blsi_cflag_32:
1324; X86:       # %bb.0:
1325; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1326; X86-NEXT:    testl %eax, %eax
1327; X86-NEXT:    jne .LBB52_1
1328; X86-NEXT:  # %bb.2:
1329; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1330; X86-NEXT:    retl
1331; X86-NEXT:  .LBB52_1:
1332; X86-NEXT:    blsil %eax, %eax
1333; X86-NEXT:    retl
1334;
1335; X64-LABEL: blsi_cflag_32:
1336; X64:       # %bb.0:
1337; X64-NEXT:    blsil %edi, %eax
1338; X64-NEXT:    cmovael %esi, %eax
1339; X64-NEXT:    retq
1340  %tobool = icmp eq i32 %x, 0
1341  %sub = sub nsw i32 0, %x
1342  %and = and i32 %sub, %x
1343  %cond = select i1 %tobool, i32 %y, i32 %and
1344  ret i32 %cond
1345}
1346
1347define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
1348; X86-LABEL: blsi_cflag_64:
1349; X86:       # %bb.0:
1350; X86-NEXT:    pushl %edi
1351; X86-NEXT:    pushl %esi
1352; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1353; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1354; X86-NEXT:    xorl %edx, %edx
1355; X86-NEXT:    movl %ecx, %eax
1356; X86-NEXT:    negl %eax
1357; X86-NEXT:    sbbl %esi, %edx
1358; X86-NEXT:    movl %ecx, %edi
1359; X86-NEXT:    orl %esi, %edi
1360; X86-NEXT:    jne .LBB53_1
1361; X86-NEXT:  # %bb.2:
1362; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1363; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1364; X86-NEXT:    jmp .LBB53_3
1365; X86-NEXT:  .LBB53_1:
1366; X86-NEXT:    andl %esi, %edx
1367; X86-NEXT:    andl %ecx, %eax
1368; X86-NEXT:  .LBB53_3:
1369; X86-NEXT:    popl %esi
1370; X86-NEXT:    popl %edi
1371; X86-NEXT:    retl
1372;
1373; X64-LABEL: blsi_cflag_64:
1374; X64:       # %bb.0:
1375; X64-NEXT:    blsiq %rdi, %rax
1376; X64-NEXT:    cmovaeq %rsi, %rax
1377; X64-NEXT:    retq
1378  %tobool = icmp eq i64 %x, 0
1379  %sub = sub nsw i64 0, %x
1380  %and = and i64 %sub, %x
1381  %cond = select i1 %tobool, i64 %y, i64 %and
1382  ret i64 %cond
1383}
1384