1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Check that a division is bypassed when appropriate only.
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom       < %s | FileCheck -check-prefixes=CHECK,ATOM %s
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64     < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s
7; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
8
9; Verify that div32 is bypassed only for Atoms.
10define i32 @div32(i32 %a, i32 %b) {
11; ATOM-LABEL: div32:
12; ATOM:       # %bb.0: # %entry
13; ATOM-NEXT:    movl %edi, %eax
14; ATOM-NEXT:    orl %esi, %eax
15; ATOM-NEXT:    testl $-256, %eax
16; ATOM-NEXT:    je .LBB0_1
17; ATOM-NEXT:  # %bb.2:
18; ATOM-NEXT:    movl %edi, %eax
19; ATOM-NEXT:    cltd
20; ATOM-NEXT:    idivl %esi
21; ATOM-NEXT:    retq
22; ATOM-NEXT:  .LBB0_1:
23; ATOM-NEXT:    movzbl %dil, %eax
24; ATOM-NEXT:    divb %sil
25; ATOM-NEXT:    movzbl %al, %eax
26; ATOM-NEXT:    retq
27;
28; REST-LABEL: div32:
29; REST:       # %bb.0: # %entry
30; REST-NEXT:    movl %edi, %eax
31; REST-NEXT:    cltd
32; REST-NEXT:    idivl %esi
33; REST-NEXT:    retq
34;
35; HUGEWS-LABEL: div32:
36; HUGEWS:       # %bb.0: # %entry
37; HUGEWS-NEXT:    movl %edi, %eax
38; HUGEWS-NEXT:    cltd
39; HUGEWS-NEXT:    idivl %esi
40; HUGEWS-NEXT:    retq
41entry:
42  %div = sdiv i32 %a, %b
43  ret i32 %div
44}
45
46; Verify that div64 is always bypassed.
47define i64 @div64(i64 %a, i64 %b) {
48; ATOM-LABEL: div64:
49; ATOM:       # %bb.0: # %entry
50; ATOM-NEXT:    movq %rdi, %rcx
51; ATOM-NEXT:    movq %rdi, %rax
52; ATOM-NEXT:    orq %rsi, %rcx
53; ATOM-NEXT:    shrq $32, %rcx
54; ATOM-NEXT:    je .LBB1_1
55; ATOM-NEXT:  # %bb.2:
56; ATOM-NEXT:    cqto
57; ATOM-NEXT:    idivq %rsi
58; ATOM-NEXT:    retq
59; ATOM-NEXT:  .LBB1_1:
60; ATOM-NEXT:    # kill: def $eax killed $eax killed $rax
61; ATOM-NEXT:    xorl %edx, %edx
62; ATOM-NEXT:    divl %esi
63; ATOM-NEXT:    # kill: def $eax killed $eax def $rax
64; ATOM-NEXT:    retq
65;
66; X64-LABEL: div64:
67; X64:       # %bb.0: # %entry
68; X64-NEXT:    movq %rdi, %rax
69; X64-NEXT:    movq %rdi, %rcx
70; X64-NEXT:    orq %rsi, %rcx
71; X64-NEXT:    shrq $32, %rcx
72; X64-NEXT:    je .LBB1_1
73; X64-NEXT:  # %bb.2:
74; X64-NEXT:    cqto
75; X64-NEXT:    idivq %rsi
76; X64-NEXT:    retq
77; X64-NEXT:  .LBB1_1:
78; X64-NEXT:    # kill: def $eax killed $eax killed $rax
79; X64-NEXT:    xorl %edx, %edx
80; X64-NEXT:    divl %esi
81; X64-NEXT:    # kill: def $eax killed $eax def $rax
82; X64-NEXT:    retq
83;
84; SLM-LABEL: div64:
85; SLM:       # %bb.0: # %entry
86; SLM-NEXT:    movq %rdi, %rcx
87; SLM-NEXT:    movq %rdi, %rax
88; SLM-NEXT:    orq %rsi, %rcx
89; SLM-NEXT:    shrq $32, %rcx
90; SLM-NEXT:    je .LBB1_1
91; SLM-NEXT:  # %bb.2:
92; SLM-NEXT:    cqto
93; SLM-NEXT:    idivq %rsi
94; SLM-NEXT:    retq
95; SLM-NEXT:  .LBB1_1:
96; SLM-NEXT:    xorl %edx, %edx
97; SLM-NEXT:    # kill: def $eax killed $eax killed $rax
98; SLM-NEXT:    divl %esi
99; SLM-NEXT:    # kill: def $eax killed $eax def $rax
100; SLM-NEXT:    retq
101;
102; SKL-LABEL: div64:
103; SKL:       # %bb.0: # %entry
104; SKL-NEXT:    movq %rdi, %rax
105; SKL-NEXT:    movq %rdi, %rcx
106; SKL-NEXT:    orq %rsi, %rcx
107; SKL-NEXT:    shrq $32, %rcx
108; SKL-NEXT:    je .LBB1_1
109; SKL-NEXT:  # %bb.2:
110; SKL-NEXT:    cqto
111; SKL-NEXT:    idivq %rsi
112; SKL-NEXT:    retq
113; SKL-NEXT:  .LBB1_1:
114; SKL-NEXT:    # kill: def $eax killed $eax killed $rax
115; SKL-NEXT:    xorl %edx, %edx
116; SKL-NEXT:    divl %esi
117; SKL-NEXT:    # kill: def $eax killed $eax def $rax
118; SKL-NEXT:    retq
119;
120; HUGEWS-LABEL: div64:
121; HUGEWS:       # %bb.0: # %entry
122; HUGEWS-NEXT:    movq %rdi, %rax
123; HUGEWS-NEXT:    cqto
124; HUGEWS-NEXT:    idivq %rsi
125; HUGEWS-NEXT:    retq
126entry:
127  %div = sdiv i64 %a, %b
128  ret i64 %div
129}
130
131
132; Verify that no extra code is generated when optimizing for size.
133
134define i64 @div64_optsize(i64 %a, i64 %b) optsize {
135; CHECK-LABEL: div64_optsize:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    movq %rdi, %rax
138; CHECK-NEXT:    cqto
139; CHECK-NEXT:    idivq %rsi
140; CHECK-NEXT:    retq
141;
142; HUGEWS-LABEL: div64_optsize:
143; HUGEWS:       # %bb.0:
144; HUGEWS-NEXT:    movq %rdi, %rax
145; HUGEWS-NEXT:    cqto
146; HUGEWS-NEXT:    idivq %rsi
147; HUGEWS-NEXT:    retq
148  %div = sdiv i64 %a, %b
149  ret i64 %div
150}
151
152define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 {
153; CHECK-LABEL: div64_pgso:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    movq %rdi, %rax
156; CHECK-NEXT:    cqto
157; CHECK-NEXT:    idivq %rsi
158; CHECK-NEXT:    retq
159;
160; HUGEWS-LABEL: div64_pgso:
161; HUGEWS:       # %bb.0:
162; HUGEWS-NEXT:    movq %rdi, %rax
163; HUGEWS-NEXT:    cqto
164; HUGEWS-NEXT:    idivq %rsi
165; HUGEWS-NEXT:    retq
166  %div = sdiv i64 %a, %b
167  ret i64 %div
168}
169
170define i64 @div64_hugews(i64 %a, i64 %b) {
171; ATOM-LABEL: div64_hugews:
172; ATOM:       # %bb.0:
173; ATOM-NEXT:    movq %rdi, %rcx
174; ATOM-NEXT:    movq %rdi, %rax
175; ATOM-NEXT:    orq %rsi, %rcx
176; ATOM-NEXT:    shrq $32, %rcx
177; ATOM-NEXT:    je .LBB4_1
178; ATOM-NEXT:  # %bb.2:
179; ATOM-NEXT:    cqto
180; ATOM-NEXT:    idivq %rsi
181; ATOM-NEXT:    retq
182; ATOM-NEXT:  .LBB4_1:
183; ATOM-NEXT:    # kill: def $eax killed $eax killed $rax
184; ATOM-NEXT:    xorl %edx, %edx
185; ATOM-NEXT:    divl %esi
186; ATOM-NEXT:    # kill: def $eax killed $eax def $rax
187; ATOM-NEXT:    retq
188;
189; X64-LABEL: div64_hugews:
190; X64:       # %bb.0:
191; X64-NEXT:    movq %rdi, %rax
192; X64-NEXT:    movq %rdi, %rcx
193; X64-NEXT:    orq %rsi, %rcx
194; X64-NEXT:    shrq $32, %rcx
195; X64-NEXT:    je .LBB4_1
196; X64-NEXT:  # %bb.2:
197; X64-NEXT:    cqto
198; X64-NEXT:    idivq %rsi
199; X64-NEXT:    retq
200; X64-NEXT:  .LBB4_1:
201; X64-NEXT:    # kill: def $eax killed $eax killed $rax
202; X64-NEXT:    xorl %edx, %edx
203; X64-NEXT:    divl %esi
204; X64-NEXT:    # kill: def $eax killed $eax def $rax
205; X64-NEXT:    retq
206;
207; SLM-LABEL: div64_hugews:
208; SLM:       # %bb.0:
209; SLM-NEXT:    movq %rdi, %rcx
210; SLM-NEXT:    movq %rdi, %rax
211; SLM-NEXT:    orq %rsi, %rcx
212; SLM-NEXT:    shrq $32, %rcx
213; SLM-NEXT:    je .LBB4_1
214; SLM-NEXT:  # %bb.2:
215; SLM-NEXT:    cqto
216; SLM-NEXT:    idivq %rsi
217; SLM-NEXT:    retq
218; SLM-NEXT:  .LBB4_1:
219; SLM-NEXT:    xorl %edx, %edx
220; SLM-NEXT:    # kill: def $eax killed $eax killed $rax
221; SLM-NEXT:    divl %esi
222; SLM-NEXT:    # kill: def $eax killed $eax def $rax
223; SLM-NEXT:    retq
224;
225; SKL-LABEL: div64_hugews:
226; SKL:       # %bb.0:
227; SKL-NEXT:    movq %rdi, %rax
228; SKL-NEXT:    movq %rdi, %rcx
229; SKL-NEXT:    orq %rsi, %rcx
230; SKL-NEXT:    shrq $32, %rcx
231; SKL-NEXT:    je .LBB4_1
232; SKL-NEXT:  # %bb.2:
233; SKL-NEXT:    cqto
234; SKL-NEXT:    idivq %rsi
235; SKL-NEXT:    retq
236; SKL-NEXT:  .LBB4_1:
237; SKL-NEXT:    # kill: def $eax killed $eax killed $rax
238; SKL-NEXT:    xorl %edx, %edx
239; SKL-NEXT:    divl %esi
240; SKL-NEXT:    # kill: def $eax killed $eax def $rax
241; SKL-NEXT:    retq
242;
243; HUGEWS-LABEL: div64_hugews:
244; HUGEWS:       # %bb.0:
245; HUGEWS-NEXT:    movq %rdi, %rax
246; HUGEWS-NEXT:    cqto
247; HUGEWS-NEXT:    idivq %rsi
248; HUGEWS-NEXT:    retq
249  %div = sdiv i64 %a, %b
250  ret i64 %div
251}
252
253define i32 @div32_optsize(i32 %a, i32 %b) optsize {
254; CHECK-LABEL: div32_optsize:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    movl %edi, %eax
257; CHECK-NEXT:    cltd
258; CHECK-NEXT:    idivl %esi
259; CHECK-NEXT:    retq
260;
261; HUGEWS-LABEL: div32_optsize:
262; HUGEWS:       # %bb.0:
263; HUGEWS-NEXT:    movl %edi, %eax
264; HUGEWS-NEXT:    cltd
265; HUGEWS-NEXT:    idivl %esi
266; HUGEWS-NEXT:    retq
267  %div = sdiv i32 %a, %b
268  ret i32 %div
269}
270
271define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 {
272; CHECK-LABEL: div32_pgso:
273; CHECK:       # %bb.0:
274; CHECK-NEXT:    movl %edi, %eax
275; CHECK-NEXT:    cltd
276; CHECK-NEXT:    idivl %esi
277; CHECK-NEXT:    retq
278;
279; HUGEWS-LABEL: div32_pgso:
280; HUGEWS:       # %bb.0:
281; HUGEWS-NEXT:    movl %edi, %eax
282; HUGEWS-NEXT:    cltd
283; HUGEWS-NEXT:    idivl %esi
284; HUGEWS-NEXT:    retq
285  %div = sdiv i32 %a, %b
286  ret i32 %div
287}
288
289define i32 @div32_minsize(i32 %a, i32 %b) minsize {
290; CHECK-LABEL: div32_minsize:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    movl %edi, %eax
293; CHECK-NEXT:    cltd
294; CHECK-NEXT:    idivl %esi
295; CHECK-NEXT:    retq
296;
297; HUGEWS-LABEL: div32_minsize:
298; HUGEWS:       # %bb.0:
299; HUGEWS-NEXT:    movl %edi, %eax
300; HUGEWS-NEXT:    cltd
301; HUGEWS-NEXT:    idivl %esi
302; HUGEWS-NEXT:    retq
303  %div = sdiv i32 %a, %b
304  ret i32 %div
305}
306
307!llvm.module.flags = !{!1}
308!1 = !{i32 1, !"ProfileSummary", !2}
309!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
310!3 = !{!"ProfileFormat", !"InstrProf"}
311!4 = !{!"TotalCount", i64 10000}
312!5 = !{!"MaxCount", i64 1000}
313!6 = !{!"MaxInternalCount", i64 1}
314!7 = !{!"MaxFunctionCount", i64 1000}
315!8 = !{!"NumCounts", i64 3}
316!9 = !{!"NumFunctions", i64 3}
317!10 = !{!"DetailedSummary", !11}
318!11 = !{!12, !13, !14}
319!12 = !{i32 10000, i64 1000, i32 1}
320!13 = !{i32 999000, i64 1000, i32 3}
321!14 = !{i32 999999, i64 5, i32 3}
322!15 = !{!"function_entry_count", i64 0}
323