1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-unknown"
6
7; Stack reload folding tests.
8;
9; By including a nop call with sideeffects we can force a partial register spill of the
10; relevant registers and check that the reload is correctly folded into the instruction.
11
12define i32 @stack_fold_andn_u32(i32 %a0, i32 %a1) {
13; CHECK-LABEL: stack_fold_andn_u32:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    pushq %rbp
16; CHECK-NEXT:    .cfi_def_cfa_offset 16
17; CHECK-NEXT:    pushq %r15
18; CHECK-NEXT:    .cfi_def_cfa_offset 24
19; CHECK-NEXT:    pushq %r14
20; CHECK-NEXT:    .cfi_def_cfa_offset 32
21; CHECK-NEXT:    pushq %r13
22; CHECK-NEXT:    .cfi_def_cfa_offset 40
23; CHECK-NEXT:    pushq %r12
24; CHECK-NEXT:    .cfi_def_cfa_offset 48
25; CHECK-NEXT:    pushq %rbx
26; CHECK-NEXT:    .cfi_def_cfa_offset 56
27; CHECK-NEXT:    .cfi_offset %rbx, -56
28; CHECK-NEXT:    .cfi_offset %r12, -48
29; CHECK-NEXT:    .cfi_offset %r13, -40
30; CHECK-NEXT:    .cfi_offset %r14, -32
31; CHECK-NEXT:    .cfi_offset %r15, -24
32; CHECK-NEXT:    .cfi_offset %rbp, -16
33; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
35; CHECK-NEXT:    #APP
36; CHECK-NEXT:    nop
37; CHECK-NEXT:    #NO_APP
38; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
39; CHECK-NEXT:    andnl {{[-0-9]+}}(%r{{[sb]}}p), %eax, %eax # 4-byte Folded Reload
40; CHECK-NEXT:    popq %rbx
41; CHECK-NEXT:    .cfi_def_cfa_offset 48
42; CHECK-NEXT:    popq %r12
43; CHECK-NEXT:    .cfi_def_cfa_offset 40
44; CHECK-NEXT:    popq %r13
45; CHECK-NEXT:    .cfi_def_cfa_offset 32
46; CHECK-NEXT:    popq %r14
47; CHECK-NEXT:    .cfi_def_cfa_offset 24
48; CHECK-NEXT:    popq %r15
49; CHECK-NEXT:    .cfi_def_cfa_offset 16
50; CHECK-NEXT:    popq %rbp
51; CHECK-NEXT:    .cfi_def_cfa_offset 8
52; CHECK-NEXT:    retq
53  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
54  %2 = xor i32 %a0, -1
55  %3 = and i32 %a1, %2
56  ret i32 %3
57}
58
59define i64 @stack_fold_andn_u64(i64 %a0, i64 %a1) {
60; CHECK-LABEL: stack_fold_andn_u64:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    pushq %rbp
63; CHECK-NEXT:    .cfi_def_cfa_offset 16
64; CHECK-NEXT:    pushq %r15
65; CHECK-NEXT:    .cfi_def_cfa_offset 24
66; CHECK-NEXT:    pushq %r14
67; CHECK-NEXT:    .cfi_def_cfa_offset 32
68; CHECK-NEXT:    pushq %r13
69; CHECK-NEXT:    .cfi_def_cfa_offset 40
70; CHECK-NEXT:    pushq %r12
71; CHECK-NEXT:    .cfi_def_cfa_offset 48
72; CHECK-NEXT:    pushq %rbx
73; CHECK-NEXT:    .cfi_def_cfa_offset 56
74; CHECK-NEXT:    .cfi_offset %rbx, -56
75; CHECK-NEXT:    .cfi_offset %r12, -48
76; CHECK-NEXT:    .cfi_offset %r13, -40
77; CHECK-NEXT:    .cfi_offset %r14, -32
78; CHECK-NEXT:    .cfi_offset %r15, -24
79; CHECK-NEXT:    .cfi_offset %rbp, -16
80; CHECK-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
81; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
82; CHECK-NEXT:    #APP
83; CHECK-NEXT:    nop
84; CHECK-NEXT:    #NO_APP
85; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
86; CHECK-NEXT:    andnq {{[-0-9]+}}(%r{{[sb]}}p), %rax, %rax # 8-byte Folded Reload
87; CHECK-NEXT:    popq %rbx
88; CHECK-NEXT:    .cfi_def_cfa_offset 48
89; CHECK-NEXT:    popq %r12
90; CHECK-NEXT:    .cfi_def_cfa_offset 40
91; CHECK-NEXT:    popq %r13
92; CHECK-NEXT:    .cfi_def_cfa_offset 32
93; CHECK-NEXT:    popq %r14
94; CHECK-NEXT:    .cfi_def_cfa_offset 24
95; CHECK-NEXT:    popq %r15
96; CHECK-NEXT:    .cfi_def_cfa_offset 16
97; CHECK-NEXT:    popq %rbp
98; CHECK-NEXT:    .cfi_def_cfa_offset 8
99; CHECK-NEXT:    retq
100  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
101  %2 = xor i64 %a0, -1
102  %3 = and i64 %a1, %2
103  ret i64 %3
104}
105
106define i32 @stack_fold_bextr_u32(i32 %a0, i32 %a1) {
107; CHECK-LABEL: stack_fold_bextr_u32:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    pushq %rbp
110; CHECK-NEXT:    .cfi_def_cfa_offset 16
111; CHECK-NEXT:    pushq %r15
112; CHECK-NEXT:    .cfi_def_cfa_offset 24
113; CHECK-NEXT:    pushq %r14
114; CHECK-NEXT:    .cfi_def_cfa_offset 32
115; CHECK-NEXT:    pushq %r13
116; CHECK-NEXT:    .cfi_def_cfa_offset 40
117; CHECK-NEXT:    pushq %r12
118; CHECK-NEXT:    .cfi_def_cfa_offset 48
119; CHECK-NEXT:    pushq %rbx
120; CHECK-NEXT:    .cfi_def_cfa_offset 56
121; CHECK-NEXT:    .cfi_offset %rbx, -56
122; CHECK-NEXT:    .cfi_offset %r12, -48
123; CHECK-NEXT:    .cfi_offset %r13, -40
124; CHECK-NEXT:    .cfi_offset %r14, -32
125; CHECK-NEXT:    .cfi_offset %r15, -24
126; CHECK-NEXT:    .cfi_offset %rbp, -16
127; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
128; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
129; CHECK-NEXT:    #APP
130; CHECK-NEXT:    nop
131; CHECK-NEXT:    #NO_APP
132; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
133; CHECK-NEXT:    bextrl %eax, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
134; CHECK-NEXT:    popq %rbx
135; CHECK-NEXT:    .cfi_def_cfa_offset 48
136; CHECK-NEXT:    popq %r12
137; CHECK-NEXT:    .cfi_def_cfa_offset 40
138; CHECK-NEXT:    popq %r13
139; CHECK-NEXT:    .cfi_def_cfa_offset 32
140; CHECK-NEXT:    popq %r14
141; CHECK-NEXT:    .cfi_def_cfa_offset 24
142; CHECK-NEXT:    popq %r15
143; CHECK-NEXT:    .cfi_def_cfa_offset 16
144; CHECK-NEXT:    popq %rbp
145; CHECK-NEXT:    .cfi_def_cfa_offset 8
146; CHECK-NEXT:    retq
147  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
148  %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
149  ret i32 %2
150}
151declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
152
153define i64 @stack_fold_bextr_u64(i64 %a0, i64 %a1) {
154; CHECK-LABEL: stack_fold_bextr_u64:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    pushq %rbp
157; CHECK-NEXT:    .cfi_def_cfa_offset 16
158; CHECK-NEXT:    pushq %r15
159; CHECK-NEXT:    .cfi_def_cfa_offset 24
160; CHECK-NEXT:    pushq %r14
161; CHECK-NEXT:    .cfi_def_cfa_offset 32
162; CHECK-NEXT:    pushq %r13
163; CHECK-NEXT:    .cfi_def_cfa_offset 40
164; CHECK-NEXT:    pushq %r12
165; CHECK-NEXT:    .cfi_def_cfa_offset 48
166; CHECK-NEXT:    pushq %rbx
167; CHECK-NEXT:    .cfi_def_cfa_offset 56
168; CHECK-NEXT:    .cfi_offset %rbx, -56
169; CHECK-NEXT:    .cfi_offset %r12, -48
170; CHECK-NEXT:    .cfi_offset %r13, -40
171; CHECK-NEXT:    .cfi_offset %r14, -32
172; CHECK-NEXT:    .cfi_offset %r15, -24
173; CHECK-NEXT:    .cfi_offset %rbp, -16
174; CHECK-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
175; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
176; CHECK-NEXT:    #APP
177; CHECK-NEXT:    nop
178; CHECK-NEXT:    #NO_APP
179; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
180; CHECK-NEXT:    bextrq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
181; CHECK-NEXT:    popq %rbx
182; CHECK-NEXT:    .cfi_def_cfa_offset 48
183; CHECK-NEXT:    popq %r12
184; CHECK-NEXT:    .cfi_def_cfa_offset 40
185; CHECK-NEXT:    popq %r13
186; CHECK-NEXT:    .cfi_def_cfa_offset 32
187; CHECK-NEXT:    popq %r14
188; CHECK-NEXT:    .cfi_def_cfa_offset 24
189; CHECK-NEXT:    popq %r15
190; CHECK-NEXT:    .cfi_def_cfa_offset 16
191; CHECK-NEXT:    popq %rbp
192; CHECK-NEXT:    .cfi_def_cfa_offset 8
193; CHECK-NEXT:    retq
194  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
195  %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
196  ret i64 %2
197}
198declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
199
200define i32 @stack_fold_blsi_u32(i32 %a0) {
201; CHECK-LABEL: stack_fold_blsi_u32:
202; CHECK:       # %bb.0:
203; CHECK-NEXT:    pushq %rbp
204; CHECK-NEXT:    .cfi_def_cfa_offset 16
205; CHECK-NEXT:    pushq %r15
206; CHECK-NEXT:    .cfi_def_cfa_offset 24
207; CHECK-NEXT:    pushq %r14
208; CHECK-NEXT:    .cfi_def_cfa_offset 32
209; CHECK-NEXT:    pushq %r13
210; CHECK-NEXT:    .cfi_def_cfa_offset 40
211; CHECK-NEXT:    pushq %r12
212; CHECK-NEXT:    .cfi_def_cfa_offset 48
213; CHECK-NEXT:    pushq %rbx
214; CHECK-NEXT:    .cfi_def_cfa_offset 56
215; CHECK-NEXT:    .cfi_offset %rbx, -56
216; CHECK-NEXT:    .cfi_offset %r12, -48
217; CHECK-NEXT:    .cfi_offset %r13, -40
218; CHECK-NEXT:    .cfi_offset %r14, -32
219; CHECK-NEXT:    .cfi_offset %r15, -24
220; CHECK-NEXT:    .cfi_offset %rbp, -16
221; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
222; CHECK-NEXT:    #APP
223; CHECK-NEXT:    nop
224; CHECK-NEXT:    #NO_APP
225; CHECK-NEXT:    blsil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
226; CHECK-NEXT:    popq %rbx
227; CHECK-NEXT:    .cfi_def_cfa_offset 48
228; CHECK-NEXT:    popq %r12
229; CHECK-NEXT:    .cfi_def_cfa_offset 40
230; CHECK-NEXT:    popq %r13
231; CHECK-NEXT:    .cfi_def_cfa_offset 32
232; CHECK-NEXT:    popq %r14
233; CHECK-NEXT:    .cfi_def_cfa_offset 24
234; CHECK-NEXT:    popq %r15
235; CHECK-NEXT:    .cfi_def_cfa_offset 16
236; CHECK-NEXT:    popq %rbp
237; CHECK-NEXT:    .cfi_def_cfa_offset 8
238; CHECK-NEXT:    retq
239  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
240  %2 = sub i32 0, %a0
241  %3 = and i32 %2, %a0
242  ret i32 %3
243}
244
245define i64 @stack_fold_blsi_u64(i64 %a0) {
246; CHECK-LABEL: stack_fold_blsi_u64:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    pushq %rbp
249; CHECK-NEXT:    .cfi_def_cfa_offset 16
250; CHECK-NEXT:    pushq %r15
251; CHECK-NEXT:    .cfi_def_cfa_offset 24
252; CHECK-NEXT:    pushq %r14
253; CHECK-NEXT:    .cfi_def_cfa_offset 32
254; CHECK-NEXT:    pushq %r13
255; CHECK-NEXT:    .cfi_def_cfa_offset 40
256; CHECK-NEXT:    pushq %r12
257; CHECK-NEXT:    .cfi_def_cfa_offset 48
258; CHECK-NEXT:    pushq %rbx
259; CHECK-NEXT:    .cfi_def_cfa_offset 56
260; CHECK-NEXT:    .cfi_offset %rbx, -56
261; CHECK-NEXT:    .cfi_offset %r12, -48
262; CHECK-NEXT:    .cfi_offset %r13, -40
263; CHECK-NEXT:    .cfi_offset %r14, -32
264; CHECK-NEXT:    .cfi_offset %r15, -24
265; CHECK-NEXT:    .cfi_offset %rbp, -16
266; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
267; CHECK-NEXT:    #APP
268; CHECK-NEXT:    nop
269; CHECK-NEXT:    #NO_APP
270; CHECK-NEXT:    blsiq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
271; CHECK-NEXT:    popq %rbx
272; CHECK-NEXT:    .cfi_def_cfa_offset 48
273; CHECK-NEXT:    popq %r12
274; CHECK-NEXT:    .cfi_def_cfa_offset 40
275; CHECK-NEXT:    popq %r13
276; CHECK-NEXT:    .cfi_def_cfa_offset 32
277; CHECK-NEXT:    popq %r14
278; CHECK-NEXT:    .cfi_def_cfa_offset 24
279; CHECK-NEXT:    popq %r15
280; CHECK-NEXT:    .cfi_def_cfa_offset 16
281; CHECK-NEXT:    popq %rbp
282; CHECK-NEXT:    .cfi_def_cfa_offset 8
283; CHECK-NEXT:    retq
284  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
285  %2 = sub i64 0, %a0
286  %3 = and i64 %2, %a0
287  ret i64 %3
288}
289
290define i32 @stack_fold_blsmsk_u32(i32 %a0) {
291; CHECK-LABEL: stack_fold_blsmsk_u32:
292; CHECK:       # %bb.0:
293; CHECK-NEXT:    pushq %rbp
294; CHECK-NEXT:    .cfi_def_cfa_offset 16
295; CHECK-NEXT:    pushq %r15
296; CHECK-NEXT:    .cfi_def_cfa_offset 24
297; CHECK-NEXT:    pushq %r14
298; CHECK-NEXT:    .cfi_def_cfa_offset 32
299; CHECK-NEXT:    pushq %r13
300; CHECK-NEXT:    .cfi_def_cfa_offset 40
301; CHECK-NEXT:    pushq %r12
302; CHECK-NEXT:    .cfi_def_cfa_offset 48
303; CHECK-NEXT:    pushq %rbx
304; CHECK-NEXT:    .cfi_def_cfa_offset 56
305; CHECK-NEXT:    .cfi_offset %rbx, -56
306; CHECK-NEXT:    .cfi_offset %r12, -48
307; CHECK-NEXT:    .cfi_offset %r13, -40
308; CHECK-NEXT:    .cfi_offset %r14, -32
309; CHECK-NEXT:    .cfi_offset %r15, -24
310; CHECK-NEXT:    .cfi_offset %rbp, -16
311; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
312; CHECK-NEXT:    #APP
313; CHECK-NEXT:    nop
314; CHECK-NEXT:    #NO_APP
315; CHECK-NEXT:    blsmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
316; CHECK-NEXT:    popq %rbx
317; CHECK-NEXT:    .cfi_def_cfa_offset 48
318; CHECK-NEXT:    popq %r12
319; CHECK-NEXT:    .cfi_def_cfa_offset 40
320; CHECK-NEXT:    popq %r13
321; CHECK-NEXT:    .cfi_def_cfa_offset 32
322; CHECK-NEXT:    popq %r14
323; CHECK-NEXT:    .cfi_def_cfa_offset 24
324; CHECK-NEXT:    popq %r15
325; CHECK-NEXT:    .cfi_def_cfa_offset 16
326; CHECK-NEXT:    popq %rbp
327; CHECK-NEXT:    .cfi_def_cfa_offset 8
328; CHECK-NEXT:    retq
329  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
330  %2 = sub i32 %a0, 1
331  %3 = xor i32 %2, %a0
332  ret i32 %3
333}
334
335define i64 @stack_fold_blsmsk_u64(i64 %a0) {
336; CHECK-LABEL: stack_fold_blsmsk_u64:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    pushq %rbp
339; CHECK-NEXT:    .cfi_def_cfa_offset 16
340; CHECK-NEXT:    pushq %r15
341; CHECK-NEXT:    .cfi_def_cfa_offset 24
342; CHECK-NEXT:    pushq %r14
343; CHECK-NEXT:    .cfi_def_cfa_offset 32
344; CHECK-NEXT:    pushq %r13
345; CHECK-NEXT:    .cfi_def_cfa_offset 40
346; CHECK-NEXT:    pushq %r12
347; CHECK-NEXT:    .cfi_def_cfa_offset 48
348; CHECK-NEXT:    pushq %rbx
349; CHECK-NEXT:    .cfi_def_cfa_offset 56
350; CHECK-NEXT:    .cfi_offset %rbx, -56
351; CHECK-NEXT:    .cfi_offset %r12, -48
352; CHECK-NEXT:    .cfi_offset %r13, -40
353; CHECK-NEXT:    .cfi_offset %r14, -32
354; CHECK-NEXT:    .cfi_offset %r15, -24
355; CHECK-NEXT:    .cfi_offset %rbp, -16
356; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
357; CHECK-NEXT:    #APP
358; CHECK-NEXT:    nop
359; CHECK-NEXT:    #NO_APP
360; CHECK-NEXT:    blsmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
361; CHECK-NEXT:    popq %rbx
362; CHECK-NEXT:    .cfi_def_cfa_offset 48
363; CHECK-NEXT:    popq %r12
364; CHECK-NEXT:    .cfi_def_cfa_offset 40
365; CHECK-NEXT:    popq %r13
366; CHECK-NEXT:    .cfi_def_cfa_offset 32
367; CHECK-NEXT:    popq %r14
368; CHECK-NEXT:    .cfi_def_cfa_offset 24
369; CHECK-NEXT:    popq %r15
370; CHECK-NEXT:    .cfi_def_cfa_offset 16
371; CHECK-NEXT:    popq %rbp
372; CHECK-NEXT:    .cfi_def_cfa_offset 8
373; CHECK-NEXT:    retq
374  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
375  %2 = sub i64 %a0, 1
376  %3 = xor i64 %2, %a0
377  ret i64 %3
378}
379
380define i32 @stack_fold_blsr_u32(i32 %a0) {
381; CHECK-LABEL: stack_fold_blsr_u32:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    pushq %rbp
384; CHECK-NEXT:    .cfi_def_cfa_offset 16
385; CHECK-NEXT:    pushq %r15
386; CHECK-NEXT:    .cfi_def_cfa_offset 24
387; CHECK-NEXT:    pushq %r14
388; CHECK-NEXT:    .cfi_def_cfa_offset 32
389; CHECK-NEXT:    pushq %r13
390; CHECK-NEXT:    .cfi_def_cfa_offset 40
391; CHECK-NEXT:    pushq %r12
392; CHECK-NEXT:    .cfi_def_cfa_offset 48
393; CHECK-NEXT:    pushq %rbx
394; CHECK-NEXT:    .cfi_def_cfa_offset 56
395; CHECK-NEXT:    .cfi_offset %rbx, -56
396; CHECK-NEXT:    .cfi_offset %r12, -48
397; CHECK-NEXT:    .cfi_offset %r13, -40
398; CHECK-NEXT:    .cfi_offset %r14, -32
399; CHECK-NEXT:    .cfi_offset %r15, -24
400; CHECK-NEXT:    .cfi_offset %rbp, -16
401; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
402; CHECK-NEXT:    #APP
403; CHECK-NEXT:    nop
404; CHECK-NEXT:    #NO_APP
405; CHECK-NEXT:    blsrl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
406; CHECK-NEXT:    popq %rbx
407; CHECK-NEXT:    .cfi_def_cfa_offset 48
408; CHECK-NEXT:    popq %r12
409; CHECK-NEXT:    .cfi_def_cfa_offset 40
410; CHECK-NEXT:    popq %r13
411; CHECK-NEXT:    .cfi_def_cfa_offset 32
412; CHECK-NEXT:    popq %r14
413; CHECK-NEXT:    .cfi_def_cfa_offset 24
414; CHECK-NEXT:    popq %r15
415; CHECK-NEXT:    .cfi_def_cfa_offset 16
416; CHECK-NEXT:    popq %rbp
417; CHECK-NEXT:    .cfi_def_cfa_offset 8
418; CHECK-NEXT:    retq
419  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
420  %2 = sub i32 %a0, 1
421  %3 = and i32 %2, %a0
422  ret i32 %3
423}
424
425define i64 @stack_fold_blsr_u64(i64 %a0) {
426; CHECK-LABEL: stack_fold_blsr_u64:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    pushq %rbp
429; CHECK-NEXT:    .cfi_def_cfa_offset 16
430; CHECK-NEXT:    pushq %r15
431; CHECK-NEXT:    .cfi_def_cfa_offset 24
432; CHECK-NEXT:    pushq %r14
433; CHECK-NEXT:    .cfi_def_cfa_offset 32
434; CHECK-NEXT:    pushq %r13
435; CHECK-NEXT:    .cfi_def_cfa_offset 40
436; CHECK-NEXT:    pushq %r12
437; CHECK-NEXT:    .cfi_def_cfa_offset 48
438; CHECK-NEXT:    pushq %rbx
439; CHECK-NEXT:    .cfi_def_cfa_offset 56
440; CHECK-NEXT:    .cfi_offset %rbx, -56
441; CHECK-NEXT:    .cfi_offset %r12, -48
442; CHECK-NEXT:    .cfi_offset %r13, -40
443; CHECK-NEXT:    .cfi_offset %r14, -32
444; CHECK-NEXT:    .cfi_offset %r15, -24
445; CHECK-NEXT:    .cfi_offset %rbp, -16
446; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
447; CHECK-NEXT:    #APP
448; CHECK-NEXT:    nop
449; CHECK-NEXT:    #NO_APP
450; CHECK-NEXT:    blsrq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
451; CHECK-NEXT:    popq %rbx
452; CHECK-NEXT:    .cfi_def_cfa_offset 48
453; CHECK-NEXT:    popq %r12
454; CHECK-NEXT:    .cfi_def_cfa_offset 40
455; CHECK-NEXT:    popq %r13
456; CHECK-NEXT:    .cfi_def_cfa_offset 32
457; CHECK-NEXT:    popq %r14
458; CHECK-NEXT:    .cfi_def_cfa_offset 24
459; CHECK-NEXT:    popq %r15
460; CHECK-NEXT:    .cfi_def_cfa_offset 16
461; CHECK-NEXT:    popq %rbp
462; CHECK-NEXT:    .cfi_def_cfa_offset 8
463; CHECK-NEXT:    retq
464  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
465  %2 = sub i64 %a0, 1
466  %3 = and i64 %2, %a0
467  ret i64 %3
468}
469
470;TODO stack_fold_tzcnt_u16
471
472define i32 @stack_fold_tzcnt_u32(i32 %a0) {
473; CHECK-LABEL: stack_fold_tzcnt_u32:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    pushq %rbp
476; CHECK-NEXT:    .cfi_def_cfa_offset 16
477; CHECK-NEXT:    pushq %r15
478; CHECK-NEXT:    .cfi_def_cfa_offset 24
479; CHECK-NEXT:    pushq %r14
480; CHECK-NEXT:    .cfi_def_cfa_offset 32
481; CHECK-NEXT:    pushq %r13
482; CHECK-NEXT:    .cfi_def_cfa_offset 40
483; CHECK-NEXT:    pushq %r12
484; CHECK-NEXT:    .cfi_def_cfa_offset 48
485; CHECK-NEXT:    pushq %rbx
486; CHECK-NEXT:    .cfi_def_cfa_offset 56
487; CHECK-NEXT:    .cfi_offset %rbx, -56
488; CHECK-NEXT:    .cfi_offset %r12, -48
489; CHECK-NEXT:    .cfi_offset %r13, -40
490; CHECK-NEXT:    .cfi_offset %r14, -32
491; CHECK-NEXT:    .cfi_offset %r15, -24
492; CHECK-NEXT:    .cfi_offset %rbp, -16
493; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
494; CHECK-NEXT:    #APP
495; CHECK-NEXT:    nop
496; CHECK-NEXT:    #NO_APP
497; CHECK-NEXT:    tzcntl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
498; CHECK-NEXT:    popq %rbx
499; CHECK-NEXT:    .cfi_def_cfa_offset 48
500; CHECK-NEXT:    popq %r12
501; CHECK-NEXT:    .cfi_def_cfa_offset 40
502; CHECK-NEXT:    popq %r13
503; CHECK-NEXT:    .cfi_def_cfa_offset 32
504; CHECK-NEXT:    popq %r14
505; CHECK-NEXT:    .cfi_def_cfa_offset 24
506; CHECK-NEXT:    popq %r15
507; CHECK-NEXT:    .cfi_def_cfa_offset 16
508; CHECK-NEXT:    popq %rbp
509; CHECK-NEXT:    .cfi_def_cfa_offset 8
510; CHECK-NEXT:    retq
511  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
512  %2 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 0)
513  ret i32 %2
514}
515declare i32 @llvm.cttz.i32(i32, i1)
516
517define i64 @stack_fold_tzcnt_u64(i64 %a0) {
518; CHECK-LABEL: stack_fold_tzcnt_u64:
519; CHECK:       # %bb.0:
520; CHECK-NEXT:    pushq %rbp
521; CHECK-NEXT:    .cfi_def_cfa_offset 16
522; CHECK-NEXT:    pushq %r15
523; CHECK-NEXT:    .cfi_def_cfa_offset 24
524; CHECK-NEXT:    pushq %r14
525; CHECK-NEXT:    .cfi_def_cfa_offset 32
526; CHECK-NEXT:    pushq %r13
527; CHECK-NEXT:    .cfi_def_cfa_offset 40
528; CHECK-NEXT:    pushq %r12
529; CHECK-NEXT:    .cfi_def_cfa_offset 48
530; CHECK-NEXT:    pushq %rbx
531; CHECK-NEXT:    .cfi_def_cfa_offset 56
532; CHECK-NEXT:    .cfi_offset %rbx, -56
533; CHECK-NEXT:    .cfi_offset %r12, -48
534; CHECK-NEXT:    .cfi_offset %r13, -40
535; CHECK-NEXT:    .cfi_offset %r14, -32
536; CHECK-NEXT:    .cfi_offset %r15, -24
537; CHECK-NEXT:    .cfi_offset %rbp, -16
538; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
539; CHECK-NEXT:    #APP
540; CHECK-NEXT:    nop
541; CHECK-NEXT:    #NO_APP
542; CHECK-NEXT:    tzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
543; CHECK-NEXT:    popq %rbx
544; CHECK-NEXT:    .cfi_def_cfa_offset 48
545; CHECK-NEXT:    popq %r12
546; CHECK-NEXT:    .cfi_def_cfa_offset 40
547; CHECK-NEXT:    popq %r13
548; CHECK-NEXT:    .cfi_def_cfa_offset 32
549; CHECK-NEXT:    popq %r14
550; CHECK-NEXT:    .cfi_def_cfa_offset 24
551; CHECK-NEXT:    popq %r15
552; CHECK-NEXT:    .cfi_def_cfa_offset 16
553; CHECK-NEXT:    popq %rbp
554; CHECK-NEXT:    .cfi_def_cfa_offset 8
555; CHECK-NEXT:    retq
556  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
557  %2 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 0)
558  ret i64 %2
559}
560declare i64 @llvm.cttz.i64(i64, i1)
561