1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
6
7;
8; AMD Intrinsics
9;
10
11define i16 @test__tzcnt_u16(i16 %a0) {
12; X32-LABEL: test__tzcnt_u16:
13; X32:       # BB#0:
14; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
15; X32-NEXT:    movzwl %ax, %ecx
16; X32-NEXT:    cmpl $0, %ecx
17; X32-NEXT:    jne .LBB0_1
18; X32-NEXT:  # BB#2:
19; X32-NEXT:    movw $16, %ax
20; X32-NEXT:    retl
21; X32-NEXT:  .LBB0_1:
22; X32-NEXT:    tzcntw %ax, %ax
23; X32-NEXT:    retl
24;
25; X64-LABEL: test__tzcnt_u16:
26; X64:       # BB#0:
27; X64-NEXT:    movw $16, %cx
28; X64-NEXT:    movzwl %di, %edx
29; X64-NEXT:    tzcntw %dx, %ax
30; X64-NEXT:    cmpl $0, %edx
31; X64-NEXT:    cmovew %cx, %ax
32; X64-NEXT:    retq
33  %zext = zext i16 %a0 to i32
34  %cmp = icmp ne i32 %zext, 0
35  %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 true)
36  %res = select i1 %cmp, i16 %cttz, i16 16
37  ret i16 %res
38}
39
40define i32 @test__andn_u32(i32 %a0, i32 %a1) {
41; X32-LABEL: test__andn_u32:
42; X32:       # BB#0:
43; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
44; X32-NEXT:    xorl $-1, %eax
45; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
46; X32-NEXT:    retl
47;
48; X64-LABEL: test__andn_u32:
49; X64:       # BB#0:
50; X64-NEXT:    xorl $-1, %edi
51; X64-NEXT:    andl %esi, %edi
52; X64-NEXT:    movl %edi, %eax
53; X64-NEXT:    retq
54  %xor = xor i32 %a0, -1
55  %res = and i32 %xor, %a1
56  ret i32 %res
57}
58
59define i32 @test__bextr_u32(i32 %a0, i32 %a1) {
60; X32-LABEL: test__bextr_u32:
61; X32:       # BB#0:
62; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
63; X32-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
64; X32-NEXT:    retl
65;
66; X64-LABEL: test__bextr_u32:
67; X64:       # BB#0:
68; X64-NEXT:    bextrl %esi, %edi, %eax
69; X64-NEXT:    retq
70  %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
71  ret i32 %res
72}
73
74define i32 @test__blsi_u32(i32 %a0) {
75; X32-LABEL: test__blsi_u32:
76; X32:       # BB#0:
77; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
78; X32-NEXT:    xorl %eax, %eax
79; X32-NEXT:    subl %ecx, %eax
80; X32-NEXT:    andl %ecx, %eax
81; X32-NEXT:    retl
82;
83; X64-LABEL: test__blsi_u32:
84; X64:       # BB#0:
85; X64-NEXT:    xorl %eax, %eax
86; X64-NEXT:    subl %edi, %eax
87; X64-NEXT:    andl %edi, %eax
88; X64-NEXT:    retq
89  %neg = sub i32 0, %a0
90  %res = and i32 %a0, %neg
91  ret i32 %res
92}
93
94define i32 @test__blsmsk_u32(i32 %a0) {
95; X32-LABEL: test__blsmsk_u32:
96; X32:       # BB#0:
97; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
98; X32-NEXT:    movl %ecx, %eax
99; X32-NEXT:    subl $1, %eax
100; X32-NEXT:    xorl %ecx, %eax
101; X32-NEXT:    retl
102;
103; X64-LABEL: test__blsmsk_u32:
104; X64:       # BB#0:
105; X64-NEXT:    movl %edi, %eax
106; X64-NEXT:    subl $1, %eax
107; X64-NEXT:    xorl %edi, %eax
108; X64-NEXT:    retq
109  %dec = sub i32 %a0, 1
110  %res = xor i32 %a0, %dec
111  ret i32 %res
112}
113
114define i32 @test__blsr_u32(i32 %a0) {
115; X32-LABEL: test__blsr_u32:
116; X32:       # BB#0:
117; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
118; X32-NEXT:    movl %ecx, %eax
119; X32-NEXT:    subl $1, %eax
120; X32-NEXT:    andl %ecx, %eax
121; X32-NEXT:    retl
122;
123; X64-LABEL: test__blsr_u32:
124; X64:       # BB#0:
125; X64-NEXT:    movl %edi, %eax
126; X64-NEXT:    subl $1, %eax
127; X64-NEXT:    andl %edi, %eax
128; X64-NEXT:    retq
129  %dec = sub i32 %a0, 1
130  %res = and i32 %a0, %dec
131  ret i32 %res
132}
133
134define i32 @test__tzcnt_u32(i32 %a0) {
135; X32-LABEL: test__tzcnt_u32:
136; X32:       # BB#0:
137; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
138; X32-NEXT:    cmpl $0, %eax
139; X32-NEXT:    jne .LBB6_1
140; X32-NEXT:  # BB#2:
141; X32-NEXT:    movl $32, %eax
142; X32-NEXT:    retl
143; X32-NEXT:  .LBB6_1:
144; X32-NEXT:    tzcntl %eax, %eax
145; X32-NEXT:    retl
146;
147; X64-LABEL: test__tzcnt_u32:
148; X64:       # BB#0:
149; X64-NEXT:    movl $32, %ecx
150; X64-NEXT:    tzcntl %edi, %eax
151; X64-NEXT:    cmovbl %ecx, %eax
152; X64-NEXT:    retq
153  %cmp = icmp ne i32 %a0, 0
154  %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true)
155  %res = select i1 %cmp, i32 %cttz, i32 32
156  ret i32 %res
157}
158
159;
160; Intel intrinsics
161;
162
163define i16 @test_tzcnt_u16(i16 %a0) {
164; X32-LABEL: test_tzcnt_u16:
165; X32:       # BB#0:
166; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
167; X32-NEXT:    movzwl %ax, %ecx
168; X32-NEXT:    cmpl $0, %ecx
169; X32-NEXT:    jne .LBB7_1
170; X32-NEXT:  # BB#2:
171; X32-NEXT:    movw $16, %ax
172; X32-NEXT:    retl
173; X32-NEXT:  .LBB7_1:
174; X32-NEXT:    tzcntw %ax, %ax
175; X32-NEXT:    retl
176;
177; X64-LABEL: test_tzcnt_u16:
178; X64:       # BB#0:
179; X64-NEXT:    movw $16, %cx
180; X64-NEXT:    movzwl %di, %edx
181; X64-NEXT:    tzcntw %dx, %ax
182; X64-NEXT:    cmpl $0, %edx
183; X64-NEXT:    cmovew %cx, %ax
184; X64-NEXT:    retq
185  %zext = zext i16 %a0 to i32
186  %cmp = icmp ne i32 %zext, 0
187  %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 true)
188  %res = select i1 %cmp, i16 %cttz, i16 16
189  ret i16 %res
190}
191
192define i32 @test_andn_u32(i32 %a0, i32 %a1) {
193; X32-LABEL: test_andn_u32:
194; X32:       # BB#0:
195; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X32-NEXT:    xorl $-1, %eax
197; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
198; X32-NEXT:    retl
199;
200; X64-LABEL: test_andn_u32:
201; X64:       # BB#0:
202; X64-NEXT:    xorl $-1, %edi
203; X64-NEXT:    andl %esi, %edi
204; X64-NEXT:    movl %edi, %eax
205; X64-NEXT:    retq
206  %xor = xor i32 %a0, -1
207  %res = and i32 %xor, %a1
208  ret i32 %res
209}
210
211define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) {
212; X32-LABEL: test_bextr_u32:
213; X32:       # BB#0:
214; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
216; X32-NEXT:    andl $255, %ecx
217; X32-NEXT:    andl $255, %eax
218; X32-NEXT:    shll $8, %eax
219; X32-NEXT:    orl %ecx, %eax
220; X32-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
221; X32-NEXT:    retl
222;
223; X64-LABEL: test_bextr_u32:
224; X64:       # BB#0:
225; X64-NEXT:    andl $255, %esi
226; X64-NEXT:    andl $255, %edx
227; X64-NEXT:    shll $8, %edx
228; X64-NEXT:    orl %esi, %edx
229; X64-NEXT:    bextrl %edx, %edi, %eax
230; X64-NEXT:    retq
231  %and1 = and i32 %a1, 255
232  %and2 = and i32 %a2, 255
233  %shl = shl i32 %and2, 8
234  %or = or i32 %and1, %shl
235  %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %or)
236  ret i32 %res
237}
238
239define i32 @test_blsi_u32(i32 %a0) {
240; X32-LABEL: test_blsi_u32:
241; X32:       # BB#0:
242; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
243; X32-NEXT:    xorl %eax, %eax
244; X32-NEXT:    subl %ecx, %eax
245; X32-NEXT:    andl %ecx, %eax
246; X32-NEXT:    retl
247;
248; X64-LABEL: test_blsi_u32:
249; X64:       # BB#0:
250; X64-NEXT:    xorl %eax, %eax
251; X64-NEXT:    subl %edi, %eax
252; X64-NEXT:    andl %edi, %eax
253; X64-NEXT:    retq
254  %neg = sub i32 0, %a0
255  %res = and i32 %a0, %neg
256  ret i32 %res
257}
258
259define i32 @test_blsmsk_u32(i32 %a0) {
260; X32-LABEL: test_blsmsk_u32:
261; X32:       # BB#0:
262; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
263; X32-NEXT:    movl %ecx, %eax
264; X32-NEXT:    subl $1, %eax
265; X32-NEXT:    xorl %ecx, %eax
266; X32-NEXT:    retl
267;
268; X64-LABEL: test_blsmsk_u32:
269; X64:       # BB#0:
270; X64-NEXT:    movl %edi, %eax
271; X64-NEXT:    subl $1, %eax
272; X64-NEXT:    xorl %edi, %eax
273; X64-NEXT:    retq
274  %dec = sub i32 %a0, 1
275  %res = xor i32 %a0, %dec
276  ret i32 %res
277}
278
279define i32 @test_blsr_u32(i32 %a0) {
280; X32-LABEL: test_blsr_u32:
281; X32:       # BB#0:
282; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
283; X32-NEXT:    movl %ecx, %eax
284; X32-NEXT:    subl $1, %eax
285; X32-NEXT:    andl %ecx, %eax
286; X32-NEXT:    retl
287;
288; X64-LABEL: test_blsr_u32:
289; X64:       # BB#0:
290; X64-NEXT:    movl %edi, %eax
291; X64-NEXT:    subl $1, %eax
292; X64-NEXT:    andl %edi, %eax
293; X64-NEXT:    retq
294  %dec = sub i32 %a0, 1
295  %res = and i32 %a0, %dec
296  ret i32 %res
297}
298
299define i32 @test_tzcnt_u32(i32 %a0) {
300; X32-LABEL: test_tzcnt_u32:
301; X32:       # BB#0:
302; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X32-NEXT:    cmpl $0, %eax
304; X32-NEXT:    jne .LBB13_1
305; X32-NEXT:  # BB#2:
306; X32-NEXT:    movl $32, %eax
307; X32-NEXT:    retl
308; X32-NEXT:  .LBB13_1:
309; X32-NEXT:    tzcntl %eax, %eax
310; X32-NEXT:    retl
311;
312; X64-LABEL: test_tzcnt_u32:
313; X64:       # BB#0:
314; X64-NEXT:    movl $32, %ecx
315; X64-NEXT:    tzcntl %edi, %eax
316; X64-NEXT:    cmovbl %ecx, %eax
317; X64-NEXT:    retq
318  %cmp = icmp ne i32 %a0, 0
319  %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true)
320  %res = select i1 %cmp, i32 %cttz, i32 32
321  ret i32 %res
322}
323
324declare i16 @llvm.cttz.i16(i16, i1)
325declare i32 @llvm.cttz.i32(i32, i1)
326declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
327