1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ
7
8declare i8 @llvm.cttz.i8(i8, i1)
9declare i16 @llvm.cttz.i16(i16, i1)
10declare i32 @llvm.cttz.i32(i32, i1)
11declare i64 @llvm.cttz.i64(i64, i1)
12
13declare i8 @llvm.ctlz.i8(i8, i1)
14declare i16 @llvm.ctlz.i16(i16, i1)
15declare i32 @llvm.ctlz.i32(i32, i1)
16declare i64 @llvm.ctlz.i64(i64, i1)
17
18define i8 @cttz_i8(i8 %x)  {
19; X86-LABEL: cttz_i8:
20; X86:       # %bb.0:
21; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
22; X86-NEXT:    # kill: def $al killed $al killed $eax
23; X86-NEXT:    retl
24;
25; X64-LABEL: cttz_i8:
26; X64:       # %bb.0:
27; X64-NEXT:    bsfl %edi, %eax
28; X64-NEXT:    # kill: def $al killed $al killed $eax
29; X64-NEXT:    retq
30;
31; X86-CLZ-LABEL: cttz_i8:
32; X86-CLZ:       # %bb.0:
33; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
34; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
35; X86-CLZ-NEXT:    retl
36;
37; X64-CLZ-LABEL: cttz_i8:
38; X64-CLZ:       # %bb.0:
39; X64-CLZ-NEXT:    tzcntl %edi, %eax
40; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
41; X64-CLZ-NEXT:    retq
42  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
43  ret i8 %tmp
44}
45
46define i16 @cttz_i16(i16 %x)  {
47; X86-LABEL: cttz_i16:
48; X86:       # %bb.0:
49; X86-NEXT:    bsfw {{[0-9]+}}(%esp), %ax
50; X86-NEXT:    retl
51;
52; X64-LABEL: cttz_i16:
53; X64:       # %bb.0:
54; X64-NEXT:    bsfw %di, %ax
55; X64-NEXT:    retq
56;
57; X86-CLZ-LABEL: cttz_i16:
58; X86-CLZ:       # %bb.0:
59; X86-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
60; X86-CLZ-NEXT:    retl
61;
62; X64-CLZ-LABEL: cttz_i16:
63; X64-CLZ:       # %bb.0:
64; X64-CLZ-NEXT:    tzcntw %di, %ax
65; X64-CLZ-NEXT:    retq
66  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
67  ret i16 %tmp
68}
69
70define i32 @cttz_i32(i32 %x)  {
71; X86-LABEL: cttz_i32:
72; X86:       # %bb.0:
73; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
74; X86-NEXT:    retl
75;
76; X64-LABEL: cttz_i32:
77; X64:       # %bb.0:
78; X64-NEXT:    bsfl %edi, %eax
79; X64-NEXT:    retq
80;
81; X86-CLZ-LABEL: cttz_i32:
82; X86-CLZ:       # %bb.0:
83; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
84; X86-CLZ-NEXT:    retl
85;
86; X64-CLZ-LABEL: cttz_i32:
87; X64-CLZ:       # %bb.0:
88; X64-CLZ-NEXT:    tzcntl %edi, %eax
89; X64-CLZ-NEXT:    retq
90  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
91  ret i32 %tmp
92}
93
94define i64 @cttz_i64(i64 %x)  {
95; X86-NOCMOV-LABEL: cttz_i64:
96; X86-NOCMOV:       # %bb.0:
97; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
98; X86-NOCMOV-NEXT:    testl %eax, %eax
99; X86-NOCMOV-NEXT:    jne .LBB3_1
100; X86-NOCMOV-NEXT:  # %bb.2:
101; X86-NOCMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
102; X86-NOCMOV-NEXT:    addl $32, %eax
103; X86-NOCMOV-NEXT:    xorl %edx, %edx
104; X86-NOCMOV-NEXT:    retl
105; X86-NOCMOV-NEXT:  .LBB3_1:
106; X86-NOCMOV-NEXT:    bsfl %eax, %eax
107; X86-NOCMOV-NEXT:    xorl %edx, %edx
108; X86-NOCMOV-NEXT:    retl
109;
110; X86-CMOV-LABEL: cttz_i64:
111; X86-CMOV:       # %bb.0:
112; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
113; X86-CMOV-NEXT:    bsfl %ecx, %edx
114; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
115; X86-CMOV-NEXT:    addl $32, %eax
116; X86-CMOV-NEXT:    testl %ecx, %ecx
117; X86-CMOV-NEXT:    cmovnel %edx, %eax
118; X86-CMOV-NEXT:    xorl %edx, %edx
119; X86-CMOV-NEXT:    retl
120;
121; X64-LABEL: cttz_i64:
122; X64:       # %bb.0:
123; X64-NEXT:    bsfq %rdi, %rax
124; X64-NEXT:    retq
125;
126; X86-CLZ-LABEL: cttz_i64:
127; X86-CLZ:       # %bb.0:
128; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
129; X86-CLZ-NEXT:    testl %eax, %eax
130; X86-CLZ-NEXT:    jne .LBB3_1
131; X86-CLZ-NEXT:  # %bb.2:
132; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
133; X86-CLZ-NEXT:    addl $32, %eax
134; X86-CLZ-NEXT:    xorl %edx, %edx
135; X86-CLZ-NEXT:    retl
136; X86-CLZ-NEXT:  .LBB3_1:
137; X86-CLZ-NEXT:    tzcntl %eax, %eax
138; X86-CLZ-NEXT:    xorl %edx, %edx
139; X86-CLZ-NEXT:    retl
140;
141; X64-CLZ-LABEL: cttz_i64:
142; X64-CLZ:       # %bb.0:
143; X64-CLZ-NEXT:    tzcntq %rdi, %rax
144; X64-CLZ-NEXT:    retq
145  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
146  ret i64 %tmp
147}
148
149define i8 @ctlz_i8(i8 %x) {
150; X86-LABEL: ctlz_i8:
151; X86:       # %bb.0:
152; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
153; X86-NEXT:    bsrl %eax, %eax
154; X86-NEXT:    xorl $7, %eax
155; X86-NEXT:    # kill: def $al killed $al killed $eax
156; X86-NEXT:    retl
157;
158; X64-LABEL: ctlz_i8:
159; X64:       # %bb.0:
160; X64-NEXT:    movzbl %dil, %eax
161; X64-NEXT:    bsrl %eax, %eax
162; X64-NEXT:    xorl $7, %eax
163; X64-NEXT:    # kill: def $al killed $al killed $eax
164; X64-NEXT:    retq
165;
166; X86-CLZ-LABEL: ctlz_i8:
167; X86-CLZ:       # %bb.0:
168; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
169; X86-CLZ-NEXT:    lzcntl %eax, %eax
170; X86-CLZ-NEXT:    addl $-24, %eax
171; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
172; X86-CLZ-NEXT:    retl
173;
174; X64-CLZ-LABEL: ctlz_i8:
175; X64-CLZ:       # %bb.0:
176; X64-CLZ-NEXT:    movzbl %dil, %eax
177; X64-CLZ-NEXT:    lzcntl %eax, %eax
178; X64-CLZ-NEXT:    addl $-24, %eax
179; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
180; X64-CLZ-NEXT:    retq
181  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
182  ret i8 %tmp2
183}
184
185define i16 @ctlz_i16(i16 %x) {
186; X86-LABEL: ctlz_i16:
187; X86:       # %bb.0:
188; X86-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
189; X86-NEXT:    xorl $15, %eax
190; X86-NEXT:    # kill: def $ax killed $ax killed $eax
191; X86-NEXT:    retl
192;
193; X64-LABEL: ctlz_i16:
194; X64:       # %bb.0:
195; X64-NEXT:    bsrw %di, %ax
196; X64-NEXT:    xorl $15, %eax
197; X64-NEXT:    # kill: def $ax killed $ax killed $eax
198; X64-NEXT:    retq
199;
200; X86-CLZ-LABEL: ctlz_i16:
201; X86-CLZ:       # %bb.0:
202; X86-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
203; X86-CLZ-NEXT:    retl
204;
205; X64-CLZ-LABEL: ctlz_i16:
206; X64-CLZ:       # %bb.0:
207; X64-CLZ-NEXT:    lzcntw %di, %ax
208; X64-CLZ-NEXT:    retq
209  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
210  ret i16 %tmp2
211}
212
213define i32 @ctlz_i32(i32 %x) {
214; X86-LABEL: ctlz_i32:
215; X86:       # %bb.0:
216; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
217; X86-NEXT:    xorl $31, %eax
218; X86-NEXT:    retl
219;
220; X64-LABEL: ctlz_i32:
221; X64:       # %bb.0:
222; X64-NEXT:    bsrl %edi, %eax
223; X64-NEXT:    xorl $31, %eax
224; X64-NEXT:    retq
225;
226; X86-CLZ-LABEL: ctlz_i32:
227; X86-CLZ:       # %bb.0:
228; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
229; X86-CLZ-NEXT:    retl
230;
231; X64-CLZ-LABEL: ctlz_i32:
232; X64-CLZ:       # %bb.0:
233; X64-CLZ-NEXT:    lzcntl %edi, %eax
234; X64-CLZ-NEXT:    retq
235  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
236  ret i32 %tmp
237}
238
239define i64 @ctlz_i64(i64 %x) {
240; X86-NOCMOV-LABEL: ctlz_i64:
241; X86-NOCMOV:       # %bb.0:
242; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
243; X86-NOCMOV-NEXT:    testl %eax, %eax
244; X86-NOCMOV-NEXT:    jne .LBB7_1
245; X86-NOCMOV-NEXT:  # %bb.2:
246; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
247; X86-NOCMOV-NEXT:    xorl $31, %eax
248; X86-NOCMOV-NEXT:    addl $32, %eax
249; X86-NOCMOV-NEXT:    xorl %edx, %edx
250; X86-NOCMOV-NEXT:    retl
251; X86-NOCMOV-NEXT:  .LBB7_1:
252; X86-NOCMOV-NEXT:    bsrl %eax, %eax
253; X86-NOCMOV-NEXT:    xorl $31, %eax
254; X86-NOCMOV-NEXT:    xorl %edx, %edx
255; X86-NOCMOV-NEXT:    retl
256;
257; X86-CMOV-LABEL: ctlz_i64:
258; X86-CMOV:       # %bb.0:
259; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
260; X86-CMOV-NEXT:    bsrl %ecx, %edx
261; X86-CMOV-NEXT:    xorl $31, %edx
262; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
263; X86-CMOV-NEXT:    xorl $31, %eax
264; X86-CMOV-NEXT:    addl $32, %eax
265; X86-CMOV-NEXT:    testl %ecx, %ecx
266; X86-CMOV-NEXT:    cmovnel %edx, %eax
267; X86-CMOV-NEXT:    xorl %edx, %edx
268; X86-CMOV-NEXT:    retl
269;
270; X64-LABEL: ctlz_i64:
271; X64:       # %bb.0:
272; X64-NEXT:    bsrq %rdi, %rax
273; X64-NEXT:    xorq $63, %rax
274; X64-NEXT:    retq
275;
276; X86-CLZ-LABEL: ctlz_i64:
277; X86-CLZ:       # %bb.0:
278; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
279; X86-CLZ-NEXT:    testl %eax, %eax
280; X86-CLZ-NEXT:    jne .LBB7_1
281; X86-CLZ-NEXT:  # %bb.2:
282; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
283; X86-CLZ-NEXT:    addl $32, %eax
284; X86-CLZ-NEXT:    xorl %edx, %edx
285; X86-CLZ-NEXT:    retl
286; X86-CLZ-NEXT:  .LBB7_1:
287; X86-CLZ-NEXT:    lzcntl %eax, %eax
288; X86-CLZ-NEXT:    xorl %edx, %edx
289; X86-CLZ-NEXT:    retl
290;
291; X64-CLZ-LABEL: ctlz_i64:
292; X64-CLZ:       # %bb.0:
293; X64-CLZ-NEXT:    lzcntq %rdi, %rax
294; X64-CLZ-NEXT:    retq
295  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
296  ret i64 %tmp
297}
298
299; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
300define i8 @ctlz_i8_zero_test(i8 %n) {
301; X86-LABEL: ctlz_i8_zero_test:
302; X86:       # %bb.0:
303; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
304; X86-NEXT:    testb %al, %al
305; X86-NEXT:    je .LBB8_1
306; X86-NEXT:  # %bb.2: # %cond.false
307; X86-NEXT:    movzbl %al, %eax
308; X86-NEXT:    bsrl %eax, %eax
309; X86-NEXT:    xorl $7, %eax
310; X86-NEXT:    # kill: def $al killed $al killed $eax
311; X86-NEXT:    retl
312; X86-NEXT:  .LBB8_1:
313; X86-NEXT:    movb $8, %al
314; X86-NEXT:    # kill: def $al killed $al killed $eax
315; X86-NEXT:    retl
316;
317; X64-LABEL: ctlz_i8_zero_test:
318; X64:       # %bb.0:
319; X64-NEXT:    testb %dil, %dil
320; X64-NEXT:    je .LBB8_1
321; X64-NEXT:  # %bb.2: # %cond.false
322; X64-NEXT:    movzbl %dil, %eax
323; X64-NEXT:    bsrl %eax, %eax
324; X64-NEXT:    xorl $7, %eax
325; X64-NEXT:    # kill: def $al killed $al killed $eax
326; X64-NEXT:    retq
327; X64-NEXT:  .LBB8_1:
328; X64-NEXT:    movb $8, %al
329; X64-NEXT:    # kill: def $al killed $al killed $eax
330; X64-NEXT:    retq
331;
332; X86-CLZ-LABEL: ctlz_i8_zero_test:
333; X86-CLZ:       # %bb.0:
334; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
335; X86-CLZ-NEXT:    lzcntl %eax, %eax
336; X86-CLZ-NEXT:    addl $-24, %eax
337; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
338; X86-CLZ-NEXT:    retl
339;
340; X64-CLZ-LABEL: ctlz_i8_zero_test:
341; X64-CLZ:       # %bb.0:
342; X64-CLZ-NEXT:    movzbl %dil, %eax
343; X64-CLZ-NEXT:    lzcntl %eax, %eax
344; X64-CLZ-NEXT:    addl $-24, %eax
345; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
346; X64-CLZ-NEXT:    retq
347  %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
348  ret i8 %tmp1
349}
350
351; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
352define i16 @ctlz_i16_zero_test(i16 %n) {
353; X86-LABEL: ctlz_i16_zero_test:
354; X86:       # %bb.0:
355; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
356; X86-NEXT:    testw %ax, %ax
357; X86-NEXT:    je .LBB9_1
358; X86-NEXT:  # %bb.2: # %cond.false
359; X86-NEXT:    bsrw %ax, %ax
360; X86-NEXT:    xorl $15, %eax
361; X86-NEXT:    # kill: def $ax killed $ax killed $eax
362; X86-NEXT:    retl
363; X86-NEXT:  .LBB9_1:
364; X86-NEXT:    movw $16, %ax
365; X86-NEXT:    # kill: def $ax killed $ax killed $eax
366; X86-NEXT:    retl
367;
368; X64-LABEL: ctlz_i16_zero_test:
369; X64:       # %bb.0:
370; X64-NEXT:    testw %di, %di
371; X64-NEXT:    je .LBB9_1
372; X64-NEXT:  # %bb.2: # %cond.false
373; X64-NEXT:    bsrw %di, %ax
374; X64-NEXT:    xorl $15, %eax
375; X64-NEXT:    # kill: def $ax killed $ax killed $eax
376; X64-NEXT:    retq
377; X64-NEXT:  .LBB9_1:
378; X64-NEXT:    movw $16, %ax
379; X64-NEXT:    # kill: def $ax killed $ax killed $eax
380; X64-NEXT:    retq
381;
382; X86-CLZ-LABEL: ctlz_i16_zero_test:
383; X86-CLZ:       # %bb.0:
384; X86-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
385; X86-CLZ-NEXT:    retl
386;
387; X64-CLZ-LABEL: ctlz_i16_zero_test:
388; X64-CLZ:       # %bb.0:
389; X64-CLZ-NEXT:    lzcntw %di, %ax
390; X64-CLZ-NEXT:    retq
391  %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
392  ret i16 %tmp1
393}
394
395; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
396define i32 @ctlz_i32_zero_test(i32 %n) {
397; X86-LABEL: ctlz_i32_zero_test:
398; X86:       # %bb.0:
399; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X86-NEXT:    testl %eax, %eax
401; X86-NEXT:    je .LBB10_1
402; X86-NEXT:  # %bb.2: # %cond.false
403; X86-NEXT:    bsrl %eax, %eax
404; X86-NEXT:    xorl $31, %eax
405; X86-NEXT:    retl
406; X86-NEXT:  .LBB10_1:
407; X86-NEXT:    movl $32, %eax
408; X86-NEXT:    retl
409;
410; X64-LABEL: ctlz_i32_zero_test:
411; X64:       # %bb.0:
412; X64-NEXT:    testl %edi, %edi
413; X64-NEXT:    je .LBB10_1
414; X64-NEXT:  # %bb.2: # %cond.false
415; X64-NEXT:    bsrl %edi, %eax
416; X64-NEXT:    xorl $31, %eax
417; X64-NEXT:    retq
418; X64-NEXT:  .LBB10_1:
419; X64-NEXT:    movl $32, %eax
420; X64-NEXT:    retq
421;
422; X86-CLZ-LABEL: ctlz_i32_zero_test:
423; X86-CLZ:       # %bb.0:
424; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
425; X86-CLZ-NEXT:    retl
426;
427; X64-CLZ-LABEL: ctlz_i32_zero_test:
428; X64-CLZ:       # %bb.0:
429; X64-CLZ-NEXT:    lzcntl %edi, %eax
430; X64-CLZ-NEXT:    retq
431  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
432  ret i32 %tmp1
433}
434
435; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
436define i64 @ctlz_i64_zero_test(i64 %n) {
437; X86-NOCMOV-LABEL: ctlz_i64_zero_test:
438; X86-NOCMOV:       # %bb.0:
439; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
440; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %edx
441; X86-NOCMOV-NEXT:    movl $63, %eax
442; X86-NOCMOV-NEXT:    je .LBB11_2
443; X86-NOCMOV-NEXT:  # %bb.1:
444; X86-NOCMOV-NEXT:    movl %edx, %eax
445; X86-NOCMOV-NEXT:  .LBB11_2:
446; X86-NOCMOV-NEXT:    testl %ecx, %ecx
447; X86-NOCMOV-NEXT:    jne .LBB11_3
448; X86-NOCMOV-NEXT:  # %bb.4:
449; X86-NOCMOV-NEXT:    xorl $31, %eax
450; X86-NOCMOV-NEXT:    addl $32, %eax
451; X86-NOCMOV-NEXT:    xorl %edx, %edx
452; X86-NOCMOV-NEXT:    retl
453; X86-NOCMOV-NEXT:  .LBB11_3:
454; X86-NOCMOV-NEXT:    bsrl %ecx, %eax
455; X86-NOCMOV-NEXT:    xorl $31, %eax
456; X86-NOCMOV-NEXT:    xorl %edx, %edx
457; X86-NOCMOV-NEXT:    retl
458;
459; X86-CMOV-LABEL: ctlz_i64_zero_test:
460; X86-CMOV:       # %bb.0:
461; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
462; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
463; X86-CMOV-NEXT:    movl $63, %edx
464; X86-CMOV-NEXT:    cmovnel %eax, %edx
465; X86-CMOV-NEXT:    xorl $31, %edx
466; X86-CMOV-NEXT:    addl $32, %edx
467; X86-CMOV-NEXT:    bsrl %ecx, %eax
468; X86-CMOV-NEXT:    xorl $31, %eax
469; X86-CMOV-NEXT:    testl %ecx, %ecx
470; X86-CMOV-NEXT:    cmovel %edx, %eax
471; X86-CMOV-NEXT:    xorl %edx, %edx
472; X86-CMOV-NEXT:    retl
473;
474; X64-LABEL: ctlz_i64_zero_test:
475; X64:       # %bb.0:
476; X64-NEXT:    testq %rdi, %rdi
477; X64-NEXT:    je .LBB11_1
478; X64-NEXT:  # %bb.2: # %cond.false
479; X64-NEXT:    bsrq %rdi, %rax
480; X64-NEXT:    xorq $63, %rax
481; X64-NEXT:    retq
482; X64-NEXT:  .LBB11_1:
483; X64-NEXT:    movl $64, %eax
484; X64-NEXT:    retq
485;
486; X86-CLZ-LABEL: ctlz_i64_zero_test:
487; X86-CLZ:       # %bb.0:
488; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
489; X86-CLZ-NEXT:    testl %eax, %eax
490; X86-CLZ-NEXT:    jne .LBB11_1
491; X86-CLZ-NEXT:  # %bb.2:
492; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
493; X86-CLZ-NEXT:    addl $32, %eax
494; X86-CLZ-NEXT:    xorl %edx, %edx
495; X86-CLZ-NEXT:    retl
496; X86-CLZ-NEXT:  .LBB11_1:
497; X86-CLZ-NEXT:    lzcntl %eax, %eax
498; X86-CLZ-NEXT:    xorl %edx, %edx
499; X86-CLZ-NEXT:    retl
500;
501; X64-CLZ-LABEL: ctlz_i64_zero_test:
502; X64-CLZ:       # %bb.0:
503; X64-CLZ-NEXT:    lzcntq %rdi, %rax
504; X64-CLZ-NEXT:    retq
505  %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
506  ret i64 %tmp1
507}
508
509; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
510define i8 @cttz_i8_zero_test(i8 %n) {
511; X86-LABEL: cttz_i8_zero_test:
512; X86:       # %bb.0:
513; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
514; X86-NEXT:    testb %al, %al
515; X86-NEXT:    je .LBB12_1
516; X86-NEXT:  # %bb.2: # %cond.false
517; X86-NEXT:    movzbl %al, %eax
518; X86-NEXT:    bsfl %eax, %eax
519; X86-NEXT:    # kill: def $al killed $al killed $eax
520; X86-NEXT:    retl
521; X86-NEXT:  .LBB12_1:
522; X86-NEXT:    movb $8, %al
523; X86-NEXT:    # kill: def $al killed $al killed $eax
524; X86-NEXT:    retl
525;
526; X64-LABEL: cttz_i8_zero_test:
527; X64:       # %bb.0:
528; X64-NEXT:    testb %dil, %dil
529; X64-NEXT:    je .LBB12_1
530; X64-NEXT:  # %bb.2: # %cond.false
531; X64-NEXT:    movzbl %dil, %eax
532; X64-NEXT:    bsfl %eax, %eax
533; X64-NEXT:    # kill: def $al killed $al killed $eax
534; X64-NEXT:    retq
535; X64-NEXT:  .LBB12_1:
536; X64-NEXT:    movb $8, %al
537; X64-NEXT:    # kill: def $al killed $al killed $eax
538; X64-NEXT:    retq
539;
540; X86-CLZ-LABEL: cttz_i8_zero_test:
541; X86-CLZ:       # %bb.0:
542; X86-CLZ-NEXT:    movl $256, %eax # imm = 0x100
543; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
544; X86-CLZ-NEXT:    tzcntl %eax, %eax
545; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
546; X86-CLZ-NEXT:    retl
547;
548; X64-CLZ-LABEL: cttz_i8_zero_test:
549; X64-CLZ:       # %bb.0:
550; X64-CLZ-NEXT:    orl $256, %edi # imm = 0x100
551; X64-CLZ-NEXT:    tzcntl %edi, %eax
552; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
553; X64-CLZ-NEXT:    retq
554  %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
555  ret i8 %tmp1
556}
557
558; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
559define i16 @cttz_i16_zero_test(i16 %n) {
560; X86-LABEL: cttz_i16_zero_test:
561; X86:       # %bb.0:
562; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
563; X86-NEXT:    testw %ax, %ax
564; X86-NEXT:    je .LBB13_1
565; X86-NEXT:  # %bb.2: # %cond.false
566; X86-NEXT:    bsfw %ax, %ax
567; X86-NEXT:    retl
568; X86-NEXT:  .LBB13_1:
569; X86-NEXT:    movw $16, %ax
570; X86-NEXT:    retl
571;
572; X64-LABEL: cttz_i16_zero_test:
573; X64:       # %bb.0:
574; X64-NEXT:    testw %di, %di
575; X64-NEXT:    je .LBB13_1
576; X64-NEXT:  # %bb.2: # %cond.false
577; X64-NEXT:    bsfw %di, %ax
578; X64-NEXT:    retq
579; X64-NEXT:  .LBB13_1:
580; X64-NEXT:    movw $16, %ax
581; X64-NEXT:    retq
582;
583; X86-CLZ-LABEL: cttz_i16_zero_test:
584; X86-CLZ:       # %bb.0:
585; X86-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
586; X86-CLZ-NEXT:    retl
587;
588; X64-CLZ-LABEL: cttz_i16_zero_test:
589; X64-CLZ:       # %bb.0:
590; X64-CLZ-NEXT:    tzcntw %di, %ax
591; X64-CLZ-NEXT:    retq
592  %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
593  ret i16 %tmp1
594}
595
596; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
597define i32 @cttz_i32_zero_test(i32 %n) {
598; X86-LABEL: cttz_i32_zero_test:
599; X86:       # %bb.0:
600; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
601; X86-NEXT:    testl %eax, %eax
602; X86-NEXT:    je .LBB14_1
603; X86-NEXT:  # %bb.2: # %cond.false
604; X86-NEXT:    bsfl %eax, %eax
605; X86-NEXT:    retl
606; X86-NEXT:  .LBB14_1:
607; X86-NEXT:    movl $32, %eax
608; X86-NEXT:    retl
609;
610; X64-LABEL: cttz_i32_zero_test:
611; X64:       # %bb.0:
612; X64-NEXT:    testl %edi, %edi
613; X64-NEXT:    je .LBB14_1
614; X64-NEXT:  # %bb.2: # %cond.false
615; X64-NEXT:    bsfl %edi, %eax
616; X64-NEXT:    retq
617; X64-NEXT:  .LBB14_1:
618; X64-NEXT:    movl $32, %eax
619; X64-NEXT:    retq
620;
621; X86-CLZ-LABEL: cttz_i32_zero_test:
622; X86-CLZ:       # %bb.0:
623; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
624; X86-CLZ-NEXT:    retl
625;
626; X64-CLZ-LABEL: cttz_i32_zero_test:
627; X64-CLZ:       # %bb.0:
628; X64-CLZ-NEXT:    tzcntl %edi, %eax
629; X64-CLZ-NEXT:    retq
630  %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
631  ret i32 %tmp1
632}
633
634; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
635define i64 @cttz_i64_zero_test(i64 %n) {
636; X86-NOCMOV-LABEL: cttz_i64_zero_test:
637; X86-NOCMOV:       # %bb.0:
638; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
639; X86-NOCMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %edx
640; X86-NOCMOV-NEXT:    movl $32, %eax
641; X86-NOCMOV-NEXT:    je .LBB15_2
642; X86-NOCMOV-NEXT:  # %bb.1:
643; X86-NOCMOV-NEXT:    movl %edx, %eax
644; X86-NOCMOV-NEXT:  .LBB15_2:
645; X86-NOCMOV-NEXT:    testl %ecx, %ecx
646; X86-NOCMOV-NEXT:    jne .LBB15_3
647; X86-NOCMOV-NEXT:  # %bb.4:
648; X86-NOCMOV-NEXT:    addl $32, %eax
649; X86-NOCMOV-NEXT:    xorl %edx, %edx
650; X86-NOCMOV-NEXT:    retl
651; X86-NOCMOV-NEXT:  .LBB15_3:
652; X86-NOCMOV-NEXT:    bsfl %ecx, %eax
653; X86-NOCMOV-NEXT:    xorl %edx, %edx
654; X86-NOCMOV-NEXT:    retl
655;
656; X86-CMOV-LABEL: cttz_i64_zero_test:
657; X86-CMOV:       # %bb.0:
658; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
659; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
660; X86-CMOV-NEXT:    movl $32, %edx
661; X86-CMOV-NEXT:    cmovnel %ecx, %edx
662; X86-CMOV-NEXT:    addl $32, %edx
663; X86-CMOV-NEXT:    bsfl %eax, %eax
664; X86-CMOV-NEXT:    cmovel %edx, %eax
665; X86-CMOV-NEXT:    xorl %edx, %edx
666; X86-CMOV-NEXT:    retl
667;
668; X64-LABEL: cttz_i64_zero_test:
669; X64:       # %bb.0:
670; X64-NEXT:    testq %rdi, %rdi
671; X64-NEXT:    je .LBB15_1
672; X64-NEXT:  # %bb.2: # %cond.false
673; X64-NEXT:    bsfq %rdi, %rax
674; X64-NEXT:    retq
675; X64-NEXT:  .LBB15_1:
676; X64-NEXT:    movl $64, %eax
677; X64-NEXT:    retq
678;
679; X86-CLZ-LABEL: cttz_i64_zero_test:
680; X86-CLZ:       # %bb.0:
681; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
682; X86-CLZ-NEXT:    testl %eax, %eax
683; X86-CLZ-NEXT:    jne .LBB15_1
684; X86-CLZ-NEXT:  # %bb.2:
685; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
686; X86-CLZ-NEXT:    addl $32, %eax
687; X86-CLZ-NEXT:    xorl %edx, %edx
688; X86-CLZ-NEXT:    retl
689; X86-CLZ-NEXT:  .LBB15_1:
690; X86-CLZ-NEXT:    tzcntl %eax, %eax
691; X86-CLZ-NEXT:    xorl %edx, %edx
692; X86-CLZ-NEXT:    retl
693;
694; X64-CLZ-LABEL: cttz_i64_zero_test:
695; X64-CLZ:       # %bb.0:
696; X64-CLZ-NEXT:    tzcntq %rdi, %rax
697; X64-CLZ-NEXT:    retq
698  %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
699  ret i64 %tmp1
700}
701
702; Don't generate the cmovne when the source is known non-zero (and bsr would
703; not set ZF).
704; rdar://9490949
705; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
706;        codegen doesn't know how to delete the movl and je.
707define i32 @ctlz_i32_fold_cmov(i32 %n) {
708; X86-LABEL: ctlz_i32_fold_cmov:
709; X86:       # %bb.0:
710; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
711; X86-NEXT:    orl $1, %eax
712; X86-NEXT:    je .LBB16_1
713; X86-NEXT:  # %bb.2: # %cond.false
714; X86-NEXT:    bsrl %eax, %eax
715; X86-NEXT:    xorl $31, %eax
716; X86-NEXT:    retl
717; X86-NEXT:  .LBB16_1:
718; X86-NEXT:    movl $32, %eax
719; X86-NEXT:    retl
720;
721; X64-LABEL: ctlz_i32_fold_cmov:
722; X64:       # %bb.0:
723; X64-NEXT:    orl $1, %edi
724; X64-NEXT:    je .LBB16_1
725; X64-NEXT:  # %bb.2: # %cond.false
726; X64-NEXT:    bsrl %edi, %eax
727; X64-NEXT:    xorl $31, %eax
728; X64-NEXT:    retq
729; X64-NEXT:  .LBB16_1:
730; X64-NEXT:    movl $32, %eax
731; X64-NEXT:    retq
732;
733; X86-CLZ-LABEL: ctlz_i32_fold_cmov:
734; X86-CLZ:       # %bb.0:
735; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
736; X86-CLZ-NEXT:    orl $1, %eax
737; X86-CLZ-NEXT:    lzcntl %eax, %eax
738; X86-CLZ-NEXT:    retl
739;
740; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
741; X64-CLZ:       # %bb.0:
742; X64-CLZ-NEXT:    orl $1, %edi
743; X64-CLZ-NEXT:    lzcntl %edi, %eax
744; X64-CLZ-NEXT:    retq
745  %or = or i32 %n, 1
746  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
747  ret i32 %tmp1
748}
749
750; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
751; the most significant bit, which is what 'bsr' does natively.
752; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
753define i32 @ctlz_bsr(i32 %n) {
754; X86-LABEL: ctlz_bsr:
755; X86:       # %bb.0:
756; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
757; X86-NEXT:    retl
758;
759; X64-LABEL: ctlz_bsr:
760; X64:       # %bb.0:
761; X64-NEXT:    bsrl %edi, %eax
762; X64-NEXT:    retq
763;
764; X86-CLZ-LABEL: ctlz_bsr:
765; X86-CLZ:       # %bb.0:
766; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
767; X86-CLZ-NEXT:    xorl $31, %eax
768; X86-CLZ-NEXT:    retl
769;
770; X64-CLZ-LABEL: ctlz_bsr:
771; X64-CLZ:       # %bb.0:
772; X64-CLZ-NEXT:    lzcntl %edi, %eax
773; X64-CLZ-NEXT:    xorl $31, %eax
774; X64-CLZ-NEXT:    retq
775  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
776  %bsr = xor i32 %ctlz, 31
777  ret i32 %bsr
778}
779
780; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
781; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
782;        codegen doesn't know how to combine the $32 and $31 into $63.
783define i32 @ctlz_bsr_zero_test(i32 %n) {
784; X86-LABEL: ctlz_bsr_zero_test:
785; X86:       # %bb.0:
786; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
787; X86-NEXT:    testl %eax, %eax
788; X86-NEXT:    je .LBB18_1
789; X86-NEXT:  # %bb.2: # %cond.false
790; X86-NEXT:    bsrl %eax, %eax
791; X86-NEXT:    xorl $31, %eax
792; X86-NEXT:    xorl $31, %eax
793; X86-NEXT:    retl
794; X86-NEXT:  .LBB18_1:
795; X86-NEXT:    movl $32, %eax
796; X86-NEXT:    xorl $31, %eax
797; X86-NEXT:    retl
798;
799; X64-LABEL: ctlz_bsr_zero_test:
800; X64:       # %bb.0:
801; X64-NEXT:    testl %edi, %edi
802; X64-NEXT:    je .LBB18_1
803; X64-NEXT:  # %bb.2: # %cond.false
804; X64-NEXT:    bsrl %edi, %eax
805; X64-NEXT:    xorl $31, %eax
806; X64-NEXT:    xorl $31, %eax
807; X64-NEXT:    retq
808; X64-NEXT:  .LBB18_1:
809; X64-NEXT:    movl $32, %eax
810; X64-NEXT:    xorl $31, %eax
811; X64-NEXT:    retq
812;
813; X86-CLZ-LABEL: ctlz_bsr_zero_test:
814; X86-CLZ:       # %bb.0:
815; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
816; X86-CLZ-NEXT:    xorl $31, %eax
817; X86-CLZ-NEXT:    retl
818;
819; X64-CLZ-LABEL: ctlz_bsr_zero_test:
820; X64-CLZ:       # %bb.0:
821; X64-CLZ-NEXT:    lzcntl %edi, %eax
822; X64-CLZ-NEXT:    xorl $31, %eax
823; X64-CLZ-NEXT:    retq
824  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
825  %bsr = xor i32 %ctlz, 31
826  ret i32 %bsr
827}
828
829define i8 @cttz_i8_knownbits(i8 %x)  {
830; X86-LABEL: cttz_i8_knownbits:
831; X86:       # %bb.0:
832; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
833; X86-NEXT:    orb $2, %al
834; X86-NEXT:    movzbl %al, %eax
835; X86-NEXT:    bsfl %eax, %eax
836; X86-NEXT:    # kill: def $al killed $al killed $eax
837; X86-NEXT:    retl
838;
839; X64-LABEL: cttz_i8_knownbits:
840; X64:       # %bb.0:
841; X64-NEXT:    orb $2, %dil
842; X64-NEXT:    movzbl %dil, %eax
843; X64-NEXT:    bsfl %eax, %eax
844; X64-NEXT:    # kill: def $al killed $al killed $eax
845; X64-NEXT:    retq
846;
847; X86-CLZ-LABEL: cttz_i8_knownbits:
848; X86-CLZ:       # %bb.0:
849; X86-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
850; X86-CLZ-NEXT:    orb $2, %al
851; X86-CLZ-NEXT:    movzbl %al, %eax
852; X86-CLZ-NEXT:    tzcntl %eax, %eax
853; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
854; X86-CLZ-NEXT:    retl
855;
856; X64-CLZ-LABEL: cttz_i8_knownbits:
857; X64-CLZ:       # %bb.0:
858; X64-CLZ-NEXT:    orb $2, %dil
859; X64-CLZ-NEXT:    movzbl %dil, %eax
860; X64-CLZ-NEXT:    tzcntl %eax, %eax
861; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
862; X64-CLZ-NEXT:    retq
863  %x2 = or i8 %x, 2
864  %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
865  %tmp2 = and i8 %tmp, 1
866  ret i8 %tmp2
867}
868
869define i8 @ctlz_i8_knownbits(i8 %x)  {
870; X86-LABEL: ctlz_i8_knownbits:
871; X86:       # %bb.0:
872; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
873; X86-NEXT:    orb $64, %al
874; X86-NEXT:    movzbl %al, %eax
875; X86-NEXT:    bsrl %eax, %eax
876; X86-NEXT:    xorl $7, %eax
877; X86-NEXT:    # kill: def $al killed $al killed $eax
878; X86-NEXT:    retl
879;
880; X64-LABEL: ctlz_i8_knownbits:
881; X64:       # %bb.0:
882; X64-NEXT:    orb $64, %dil
883; X64-NEXT:    movzbl %dil, %eax
884; X64-NEXT:    bsrl %eax, %eax
885; X64-NEXT:    xorl $7, %eax
886; X64-NEXT:    # kill: def $al killed $al killed $eax
887; X64-NEXT:    retq
888;
889; X86-CLZ-LABEL: ctlz_i8_knownbits:
890; X86-CLZ:       # %bb.0:
891; X86-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
892; X86-CLZ-NEXT:    orb $64, %al
893; X86-CLZ-NEXT:    movzbl %al, %eax
894; X86-CLZ-NEXT:    lzcntl %eax, %eax
895; X86-CLZ-NEXT:    addl $-24, %eax
896; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
897; X86-CLZ-NEXT:    retl
898;
899; X64-CLZ-LABEL: ctlz_i8_knownbits:
900; X64-CLZ:       # %bb.0:
901; X64-CLZ-NEXT:    orb $64, %dil
902; X64-CLZ-NEXT:    movzbl %dil, %eax
903; X64-CLZ-NEXT:    lzcntl %eax, %eax
904; X64-CLZ-NEXT:    addl $-24, %eax
905; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
906; X64-CLZ-NEXT:    retq
907
908  %x2 = or i8 %x, 64
909  %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
910  %tmp2 = and i8 %tmp, 1
911  ret i8 %tmp2
912}
913
914; Make sure we can detect that the input is non-zero and avoid cmov after BSR
915; This is relevant for 32-bit mode without lzcnt
916define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
917; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
918; X86-NOCMOV:       # %bb.0:
919; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
920; X86-NOCMOV-NEXT:    testl %eax, %eax
921; X86-NOCMOV-NEXT:    jne .LBB21_1
922; X86-NOCMOV-NEXT:  # %bb.2:
923; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
924; X86-NOCMOV-NEXT:    orl $1, %eax
925; X86-NOCMOV-NEXT:    bsrl %eax, %eax
926; X86-NOCMOV-NEXT:    xorl $31, %eax
927; X86-NOCMOV-NEXT:    orl $32, %eax
928; X86-NOCMOV-NEXT:    xorl %edx, %edx
929; X86-NOCMOV-NEXT:    retl
930; X86-NOCMOV-NEXT:  .LBB21_1:
931; X86-NOCMOV-NEXT:    bsrl %eax, %eax
932; X86-NOCMOV-NEXT:    xorl $31, %eax
933; X86-NOCMOV-NEXT:    xorl %edx, %edx
934; X86-NOCMOV-NEXT:    retl
935;
936; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
937; X86-CMOV:       # %bb.0:
938; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
939; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
940; X86-CMOV-NEXT:    orl $1, %eax
941; X86-CMOV-NEXT:    bsrl %ecx, %edx
942; X86-CMOV-NEXT:    xorl $31, %edx
943; X86-CMOV-NEXT:    bsrl %eax, %eax
944; X86-CMOV-NEXT:    xorl $31, %eax
945; X86-CMOV-NEXT:    orl $32, %eax
946; X86-CMOV-NEXT:    testl %ecx, %ecx
947; X86-CMOV-NEXT:    cmovnel %edx, %eax
948; X86-CMOV-NEXT:    xorl %edx, %edx
949; X86-CMOV-NEXT:    retl
950;
951; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
952; X64:       # %bb.0:
953; X64-NEXT:    orq $1, %rdi
954; X64-NEXT:    je .LBB21_1
955; X64-NEXT:  # %bb.2: # %cond.false
956; X64-NEXT:    bsrq %rdi, %rax
957; X64-NEXT:    xorq $63, %rax
958; X64-NEXT:    retq
959; X64-NEXT:  .LBB21_1:
960; X64-NEXT:    movl $64, %eax
961; X64-NEXT:    retq
962;
963; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
964; X86-CLZ:       # %bb.0:
965; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
966; X86-CLZ-NEXT:    testl %eax, %eax
967; X86-CLZ-NEXT:    jne .LBB21_1
968; X86-CLZ-NEXT:  # %bb.2:
969; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
970; X86-CLZ-NEXT:    orl $1, %eax
971; X86-CLZ-NEXT:    lzcntl %eax, %eax
972; X86-CLZ-NEXT:    orl $32, %eax
973; X86-CLZ-NEXT:    xorl %edx, %edx
974; X86-CLZ-NEXT:    retl
975; X86-CLZ-NEXT:  .LBB21_1:
976; X86-CLZ-NEXT:    lzcntl %eax, %eax
977; X86-CLZ-NEXT:    xorl %edx, %edx
978; X86-CLZ-NEXT:    retl
979;
980; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
981; X64-CLZ:       # %bb.0:
982; X64-CLZ-NEXT:    orq $1, %rdi
983; X64-CLZ-NEXT:    lzcntq %rdi, %rax
984; X64-CLZ-NEXT:    retq
985  %o = or i64 %n, 1
986  %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
987  ret i64 %tmp1
988}
989
990; Make sure we can detect that the input is non-zero and avoid cmov after BSF
991; This is relevant for 32-bit mode without tzcnt
992define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
993; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero:
994; X86-NOCMOV:       # %bb.0:
995; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
996; X86-NOCMOV-NEXT:    testl %eax, %eax
997; X86-NOCMOV-NEXT:    jne .LBB22_1
998; X86-NOCMOV-NEXT:  # %bb.2:
999; X86-NOCMOV-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
1000; X86-NOCMOV-NEXT:    orl {{[0-9]+}}(%esp), %eax
1001; X86-NOCMOV-NEXT:    bsfl %eax, %eax
1002; X86-NOCMOV-NEXT:    orl $32, %eax
1003; X86-NOCMOV-NEXT:    xorl %edx, %edx
1004; X86-NOCMOV-NEXT:    retl
1005; X86-NOCMOV-NEXT:  .LBB22_1:
1006; X86-NOCMOV-NEXT:    bsfl %eax, %eax
1007; X86-NOCMOV-NEXT:    xorl %edx, %edx
1008; X86-NOCMOV-NEXT:    retl
1009;
1010; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero:
1011; X86-CMOV:       # %bb.0:
1012; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1013; X86-CMOV-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
1014; X86-CMOV-NEXT:    orl {{[0-9]+}}(%esp), %eax
1015; X86-CMOV-NEXT:    bsfl %ecx, %edx
1016; X86-CMOV-NEXT:    bsfl %eax, %eax
1017; X86-CMOV-NEXT:    orl $32, %eax
1018; X86-CMOV-NEXT:    testl %ecx, %ecx
1019; X86-CMOV-NEXT:    cmovnel %edx, %eax
1020; X86-CMOV-NEXT:    xorl %edx, %edx
1021; X86-CMOV-NEXT:    retl
1022;
1023; X64-LABEL: cttz_i64_zero_test_knownneverzero:
1024; X64:       # %bb.0:
1025; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1026; X64-NEXT:    orq %rdi, %rax
1027; X64-NEXT:    je .LBB22_1
1028; X64-NEXT:  # %bb.2: # %cond.false
1029; X64-NEXT:    bsfq %rax, %rax
1030; X64-NEXT:    retq
1031; X64-NEXT:  .LBB22_1:
1032; X64-NEXT:    movl $64, %eax
1033; X64-NEXT:    retq
1034;
1035; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1036; X86-CLZ:       # %bb.0:
1037; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
1038; X86-CLZ-NEXT:    testl %eax, %eax
1039; X86-CLZ-NEXT:    jne .LBB22_1
1040; X86-CLZ-NEXT:  # %bb.2:
1041; X86-CLZ-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
1042; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
1043; X86-CLZ-NEXT:    tzcntl %eax, %eax
1044; X86-CLZ-NEXT:    orl $32, %eax
1045; X86-CLZ-NEXT:    xorl %edx, %edx
1046; X86-CLZ-NEXT:    retl
1047; X86-CLZ-NEXT:  .LBB22_1:
1048; X86-CLZ-NEXT:    tzcntl %eax, %eax
1049; X86-CLZ-NEXT:    xorl %edx, %edx
1050; X86-CLZ-NEXT:    retl
1051;
1052; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
1053; X64-CLZ:       # %bb.0:
1054; X64-CLZ-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1055; X64-CLZ-NEXT:    orq %rdi, %rax
1056; X64-CLZ-NEXT:    tzcntq %rax, %rax
1057; X64-CLZ-NEXT:    retq
1058  %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
1059  %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
1060  ret i64 %tmp1
1061}
1062