1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
6
7declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone
8declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone
9declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
10declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone
11
12;
13; Variable Funnel Shift
14;
15
16define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
17; X86-LABEL: var_shift_i8:
18; X86:       # %bb.0:
19; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
20; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
21; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
22; X86-NEXT:    shll $8, %eax
23; X86-NEXT:    orl %edx, %eax
24; X86-NEXT:    andb $7, %cl
25; X86-NEXT:    shll %cl, %eax
26; X86-NEXT:    movb %ah, %al
27; X86-NEXT:    retl
28;
29; X64-LABEL: var_shift_i8:
30; X64:       # %bb.0:
31; X64-NEXT:    movl %edx, %ecx
32; X64-NEXT:    shll $8, %edi
33; X64-NEXT:    movzbl %sil, %eax
34; X64-NEXT:    orl %edi, %eax
35; X64-NEXT:    andb $7, %cl
36; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
37; X64-NEXT:    shll %cl, %eax
38; X64-NEXT:    shrl $8, %eax
39; X64-NEXT:    # kill: def $al killed $al killed $eax
40; X64-NEXT:    retq
41  %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z)
42  ret i8 %tmp
43}
44
45define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
46; X86-FAST-LABEL: var_shift_i16:
47; X86-FAST:       # %bb.0:
48; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
49; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
50; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
51; X86-FAST-NEXT:    andb $15, %cl
52; X86-FAST-NEXT:    shldw %cl, %dx, %ax
53; X86-FAST-NEXT:    retl
54;
55; X86-SLOW-LABEL: var_shift_i16:
56; X86-SLOW:       # %bb.0:
57; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
58; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
59; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X86-SLOW-NEXT:    shll $16, %eax
61; X86-SLOW-NEXT:    orl %edx, %eax
62; X86-SLOW-NEXT:    andb $15, %cl
63; X86-SLOW-NEXT:    shll %cl, %eax
64; X86-SLOW-NEXT:    shrl $16, %eax
65; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
66; X86-SLOW-NEXT:    retl
67;
68; X64-FAST-LABEL: var_shift_i16:
69; X64-FAST:       # %bb.0:
70; X64-FAST-NEXT:    movl %edx, %ecx
71; X64-FAST-NEXT:    movl %edi, %eax
72; X64-FAST-NEXT:    andb $15, %cl
73; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
74; X64-FAST-NEXT:    shldw %cl, %si, %ax
75; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
76; X64-FAST-NEXT:    retq
77;
78; X64-SLOW-LABEL: var_shift_i16:
79; X64-SLOW:       # %bb.0:
80; X64-SLOW-NEXT:    movl %edx, %ecx
81; X64-SLOW-NEXT:    shll $16, %edi
82; X64-SLOW-NEXT:    movzwl %si, %eax
83; X64-SLOW-NEXT:    orl %edi, %eax
84; X64-SLOW-NEXT:    andb $15, %cl
85; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
86; X64-SLOW-NEXT:    shll %cl, %eax
87; X64-SLOW-NEXT:    shrl $16, %eax
88; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
89; X64-SLOW-NEXT:    retq
90  %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
91  ret i16 %tmp
92}
93
94define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
95; X86-FAST-LABEL: var_shift_i32:
96; X86-FAST:       # %bb.0:
97; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
98; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
99; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
100; X86-FAST-NEXT:    shldl %cl, %edx, %eax
101; X86-FAST-NEXT:    retl
102;
103; X86-SLOW-LABEL: var_shift_i32:
104; X86-SLOW:       # %bb.0:
105; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
106; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
107; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
108; X86-SLOW-NEXT:    shll %cl, %edx
109; X86-SLOW-NEXT:    notb %cl
110; X86-SLOW-NEXT:    shrl %eax
111; X86-SLOW-NEXT:    shrl %cl, %eax
112; X86-SLOW-NEXT:    orl %edx, %eax
113; X86-SLOW-NEXT:    retl
114;
115; X64-FAST-LABEL: var_shift_i32:
116; X64-FAST:       # %bb.0:
117; X64-FAST-NEXT:    movl %edx, %ecx
118; X64-FAST-NEXT:    movl %edi, %eax
119; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
120; X64-FAST-NEXT:    shldl %cl, %esi, %eax
121; X64-FAST-NEXT:    retq
122;
123; X64-SLOW-LABEL: var_shift_i32:
124; X64-SLOW:       # %bb.0:
125; X64-SLOW-NEXT:    movl %edx, %ecx
126; X64-SLOW-NEXT:    movl %esi, %eax
127; X64-SLOW-NEXT:    shll %cl, %edi
128; X64-SLOW-NEXT:    shrl %eax
129; X64-SLOW-NEXT:    notb %cl
130; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
131; X64-SLOW-NEXT:    shrl %cl, %eax
132; X64-SLOW-NEXT:    orl %edi, %eax
133; X64-SLOW-NEXT:    retq
134  %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
135  ret i32 %tmp
136}
137
138define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
139; X86-LABEL: var_shift_i32_optsize:
140; X86:       # %bb.0:
141; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
142; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
143; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
144; X86-NEXT:    shldl %cl, %edx, %eax
145; X86-NEXT:    retl
146;
147; X64-LABEL: var_shift_i32_optsize:
148; X64:       # %bb.0:
149; X64-NEXT:    movl %edx, %ecx
150; X64-NEXT:    movl %edi, %eax
151; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
152; X64-NEXT:    shldl %cl, %esi, %eax
153; X64-NEXT:    retq
154  %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
155  ret i32 %tmp
156}
157
158define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
159; X86-LABEL: var_shift_i32_pgso:
160; X86:       # %bb.0:
161; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
162; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
163; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
164; X86-NEXT:    shldl %cl, %edx, %eax
165; X86-NEXT:    retl
166;
167; X64-LABEL: var_shift_i32_pgso:
168; X64:       # %bb.0:
169; X64-NEXT:    movl %edx, %ecx
170; X64-NEXT:    movl %edi, %eax
171; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
172; X64-NEXT:    shldl %cl, %esi, %eax
173; X64-NEXT:    retq
174  %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
175  ret i32 %tmp
176}
177
178define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
179; X86-FAST-LABEL: var_shift_i64:
180; X86-FAST:       # %bb.0:
181; X86-FAST-NEXT:    pushl %ebx
182; X86-FAST-NEXT:    pushl %edi
183; X86-FAST-NEXT:    pushl %esi
184; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
185; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
186; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
187; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edi
188; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %ch
189; X86-FAST-NEXT:    movb %ch, %cl
190; X86-FAST-NEXT:    notb %cl
191; X86-FAST-NEXT:    shrdl $1, %edi, %esi
192; X86-FAST-NEXT:    shrl %edi
193; X86-FAST-NEXT:    shrdl %cl, %edi, %esi
194; X86-FAST-NEXT:    shrl %cl, %edi
195; X86-FAST-NEXT:    testb $32, %cl
196; X86-FAST-NEXT:    je .LBB5_2
197; X86-FAST-NEXT:  # %bb.1:
198; X86-FAST-NEXT:    movl %edi, %esi
199; X86-FAST-NEXT:    xorl %edi, %edi
200; X86-FAST-NEXT:  .LBB5_2:
201; X86-FAST-NEXT:    movl %ebx, %eax
202; X86-FAST-NEXT:    movb %ch, %cl
203; X86-FAST-NEXT:    shll %cl, %eax
204; X86-FAST-NEXT:    shldl %cl, %ebx, %edx
205; X86-FAST-NEXT:    testb $32, %ch
206; X86-FAST-NEXT:    je .LBB5_4
207; X86-FAST-NEXT:  # %bb.3:
208; X86-FAST-NEXT:    movl %eax, %edx
209; X86-FAST-NEXT:    xorl %eax, %eax
210; X86-FAST-NEXT:  .LBB5_4:
211; X86-FAST-NEXT:    orl %edi, %edx
212; X86-FAST-NEXT:    orl %esi, %eax
213; X86-FAST-NEXT:    popl %esi
214; X86-FAST-NEXT:    popl %edi
215; X86-FAST-NEXT:    popl %ebx
216; X86-FAST-NEXT:    retl
217;
218; X86-SLOW-LABEL: var_shift_i64:
219; X86-SLOW:       # %bb.0:
220; X86-SLOW-NEXT:    pushl %ebp
221; X86-SLOW-NEXT:    pushl %ebx
222; X86-SLOW-NEXT:    pushl %edi
223; X86-SLOW-NEXT:    pushl %esi
224; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
225; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
226; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
227; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
228; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
229; X86-SLOW-NEXT:    shrl %eax
230; X86-SLOW-NEXT:    movl %esi, %edi
231; X86-SLOW-NEXT:    shll $31, %edi
232; X86-SLOW-NEXT:    orl %eax, %edi
233; X86-SLOW-NEXT:    movl %ecx, %eax
234; X86-SLOW-NEXT:    movb %cl, %ch
235; X86-SLOW-NEXT:    notb %ch
236; X86-SLOW-NEXT:    movb %ch, %cl
237; X86-SLOW-NEXT:    shrl %cl, %edi
238; X86-SLOW-NEXT:    shrl %esi
239; X86-SLOW-NEXT:    leal (%esi,%esi), %ebp
240; X86-SLOW-NEXT:    movb %al, %cl
241; X86-SLOW-NEXT:    shll %cl, %ebp
242; X86-SLOW-NEXT:    shll %cl, %ebx
243; X86-SLOW-NEXT:    movl %edx, %eax
244; X86-SLOW-NEXT:    shrl %eax
245; X86-SLOW-NEXT:    movb %ch, %cl
246; X86-SLOW-NEXT:    shrl %cl, %eax
247; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
248; X86-SLOW-NEXT:    shll %cl, %edx
249; X86-SLOW-NEXT:    testb $32, {{[0-9]+}}(%esp)
250; X86-SLOW-NEXT:    jne .LBB5_1
251; X86-SLOW-NEXT:  # %bb.2:
252; X86-SLOW-NEXT:    orl %eax, %ebx
253; X86-SLOW-NEXT:    jmp .LBB5_3
254; X86-SLOW-NEXT:  .LBB5_1:
255; X86-SLOW-NEXT:    movl %edx, %ebx
256; X86-SLOW-NEXT:    xorl %edx, %edx
257; X86-SLOW-NEXT:  .LBB5_3:
258; X86-SLOW-NEXT:    movb %ch, %cl
259; X86-SLOW-NEXT:    shrl %cl, %esi
260; X86-SLOW-NEXT:    testb $32, %ch
261; X86-SLOW-NEXT:    jne .LBB5_4
262; X86-SLOW-NEXT:  # %bb.5:
263; X86-SLOW-NEXT:    orl %edi, %ebp
264; X86-SLOW-NEXT:    jmp .LBB5_6
265; X86-SLOW-NEXT:  .LBB5_4:
266; X86-SLOW-NEXT:    movl %esi, %ebp
267; X86-SLOW-NEXT:    xorl %esi, %esi
268; X86-SLOW-NEXT:  .LBB5_6:
269; X86-SLOW-NEXT:    orl %ebp, %edx
270; X86-SLOW-NEXT:    orl %esi, %ebx
271; X86-SLOW-NEXT:    movl %edx, %eax
272; X86-SLOW-NEXT:    movl %ebx, %edx
273; X86-SLOW-NEXT:    popl %esi
274; X86-SLOW-NEXT:    popl %edi
275; X86-SLOW-NEXT:    popl %ebx
276; X86-SLOW-NEXT:    popl %ebp
277; X86-SLOW-NEXT:    retl
278;
279; X64-FAST-LABEL: var_shift_i64:
280; X64-FAST:       # %bb.0:
281; X64-FAST-NEXT:    movq %rdx, %rcx
282; X64-FAST-NEXT:    movq %rdi, %rax
283; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $rcx
284; X64-FAST-NEXT:    shldq %cl, %rsi, %rax
285; X64-FAST-NEXT:    retq
286;
287; X64-SLOW-LABEL: var_shift_i64:
288; X64-SLOW:       # %bb.0:
289; X64-SLOW-NEXT:    movq %rdx, %rcx
290; X64-SLOW-NEXT:    movq %rsi, %rax
291; X64-SLOW-NEXT:    shlq %cl, %rdi
292; X64-SLOW-NEXT:    shrq %rax
293; X64-SLOW-NEXT:    notb %cl
294; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $rcx
295; X64-SLOW-NEXT:    shrq %cl, %rax
296; X64-SLOW-NEXT:    orq %rdi, %rax
297; X64-SLOW-NEXT:    retq
298  %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
299  ret i64 %tmp
300}
301
302;
303; Const Funnel Shift
304;
305
306define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
307; X86-LABEL: const_shift_i8:
308; X86:       # %bb.0:
309; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
310; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
311; X86-NEXT:    shrb %cl
312; X86-NEXT:    shlb $7, %al
313; X86-NEXT:    orb %cl, %al
314; X86-NEXT:    retl
315;
316; X64-LABEL: const_shift_i8:
317; X64:       # %bb.0:
318; X64-NEXT:    # kill: def $esi killed $esi def $rsi
319; X64-NEXT:    # kill: def $edi killed $edi def $rdi
320; X64-NEXT:    shrb %sil
321; X64-NEXT:    shlb $7, %dil
322; X64-NEXT:    leal (%rdi,%rsi), %eax
323; X64-NEXT:    # kill: def $al killed $al killed $eax
324; X64-NEXT:    retq
325  %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7)
326  ret i8 %tmp
327}
328
329define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
330; X86-FAST-LABEL: const_shift_i16:
331; X86-FAST:       # %bb.0:
332; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
333; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
334; X86-FAST-NEXT:    shldw $7, %cx, %ax
335; X86-FAST-NEXT:    retl
336;
337; X86-SLOW-LABEL: const_shift_i16:
338; X86-SLOW:       # %bb.0:
339; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
340; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
341; X86-SLOW-NEXT:    shrl $9, %ecx
342; X86-SLOW-NEXT:    shll $7, %eax
343; X86-SLOW-NEXT:    orl %ecx, %eax
344; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
345; X86-SLOW-NEXT:    retl
346;
347; X64-FAST-LABEL: const_shift_i16:
348; X64-FAST:       # %bb.0:
349; X64-FAST-NEXT:    movl %edi, %eax
350; X64-FAST-NEXT:    shldw $7, %si, %ax
351; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
352; X64-FAST-NEXT:    retq
353;
354; X64-SLOW-LABEL: const_shift_i16:
355; X64-SLOW:       # %bb.0:
356; X64-SLOW-NEXT:    movzwl %si, %eax
357; X64-SLOW-NEXT:    shll $7, %edi
358; X64-SLOW-NEXT:    shrl $9, %eax
359; X64-SLOW-NEXT:    orl %edi, %eax
360; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
361; X64-SLOW-NEXT:    retq
362  %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7)
363  ret i16 %tmp
364}
365
366define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
367; X86-FAST-LABEL: const_shift_i32:
368; X86-FAST:       # %bb.0:
369; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
370; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
371; X86-FAST-NEXT:    shldl $7, %ecx, %eax
372; X86-FAST-NEXT:    retl
373;
374; X86-SLOW-LABEL: const_shift_i32:
375; X86-SLOW:       # %bb.0:
376; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
377; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
378; X86-SLOW-NEXT:    shrl $25, %ecx
379; X86-SLOW-NEXT:    shll $7, %eax
380; X86-SLOW-NEXT:    orl %ecx, %eax
381; X86-SLOW-NEXT:    retl
382;
383; X64-FAST-LABEL: const_shift_i32:
384; X64-FAST:       # %bb.0:
385; X64-FAST-NEXT:    movl %edi, %eax
386; X64-FAST-NEXT:    shldl $7, %esi, %eax
387; X64-FAST-NEXT:    retq
388;
389; X64-SLOW-LABEL: const_shift_i32:
390; X64-SLOW:       # %bb.0:
391; X64-SLOW-NEXT:    # kill: def $esi killed $esi def $rsi
392; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
393; X64-SLOW-NEXT:    shrl $25, %esi
394; X64-SLOW-NEXT:    shll $7, %edi
395; X64-SLOW-NEXT:    leal (%rdi,%rsi), %eax
396; X64-SLOW-NEXT:    retq
397  %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
398  ret i32 %tmp
399}
400
401define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
402; X86-FAST-LABEL: const_shift_i64:
403; X86-FAST:       # %bb.0:
404; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
405; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
406; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
407; X86-FAST-NEXT:    shrdl $25, %ecx, %eax
408; X86-FAST-NEXT:    shldl $7, %ecx, %edx
409; X86-FAST-NEXT:    retl
410;
411; X86-SLOW-LABEL: const_shift_i64:
412; X86-SLOW:       # %bb.0:
413; X86-SLOW-NEXT:    pushl %esi
414; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
415; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
416; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
417; X86-SLOW-NEXT:    shrl $25, %esi
418; X86-SLOW-NEXT:    movl %ecx, %eax
419; X86-SLOW-NEXT:    shll $7, %eax
420; X86-SLOW-NEXT:    orl %esi, %eax
421; X86-SLOW-NEXT:    shrl $25, %ecx
422; X86-SLOW-NEXT:    shll $7, %edx
423; X86-SLOW-NEXT:    orl %ecx, %edx
424; X86-SLOW-NEXT:    popl %esi
425; X86-SLOW-NEXT:    retl
426;
427; X64-FAST-LABEL: const_shift_i64:
428; X64-FAST:       # %bb.0:
429; X64-FAST-NEXT:    movq %rdi, %rax
430; X64-FAST-NEXT:    shldq $7, %rsi, %rax
431; X64-FAST-NEXT:    retq
432;
433; X64-SLOW-LABEL: const_shift_i64:
434; X64-SLOW:       # %bb.0:
435; X64-SLOW-NEXT:    shrq $57, %rsi
436; X64-SLOW-NEXT:    shlq $7, %rdi
437; X64-SLOW-NEXT:    leaq (%rdi,%rsi), %rax
438; X64-SLOW-NEXT:    retq
439  %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7)
440  ret i64 %tmp
441}
442
443;
444; Combine Consecutive Loads
445;
446
447define i8 @combine_fshl_load_i8(i8* %p) nounwind {
448; X86-LABEL: combine_fshl_load_i8:
449; X86:       # %bb.0:
450; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
451; X86-NEXT:    movb 1(%eax), %al
452; X86-NEXT:    retl
453;
454; X64-LABEL: combine_fshl_load_i8:
455; X64:       # %bb.0:
456; X64-NEXT:    movb 1(%rdi), %al
457; X64-NEXT:    retq
458  %p1 = getelementptr i8, i8* %p, i32 1
459  %ld0 = load i8, i8 *%p
460  %ld1 = load i8, i8 *%p1
461  %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8)
462  ret i8 %res
463}
464
465define i16 @combine_fshl_load_i16(i16* %p) nounwind {
466; X86-LABEL: combine_fshl_load_i16:
467; X86:       # %bb.0:
468; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
469; X86-NEXT:    movzwl 1(%eax), %eax
470; X86-NEXT:    retl
471;
472; X64-LABEL: combine_fshl_load_i16:
473; X64:       # %bb.0:
474; X64-NEXT:    movzwl 1(%rdi), %eax
475; X64-NEXT:    retq
476  %p0 = getelementptr i16, i16* %p, i32 0
477  %p1 = getelementptr i16, i16* %p, i32 1
478  %ld0 = load i16, i16 *%p0
479  %ld1 = load i16, i16 *%p1
480  %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8)
481  ret i16 %res
482}
483
484define i32 @combine_fshl_load_i32(i32* %p) nounwind {
485; X86-LABEL: combine_fshl_load_i32:
486; X86:       # %bb.0:
487; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
488; X86-NEXT:    movl 11(%eax), %eax
489; X86-NEXT:    retl
490;
491; X64-LABEL: combine_fshl_load_i32:
492; X64:       # %bb.0:
493; X64-NEXT:    movl 11(%rdi), %eax
494; X64-NEXT:    retq
495  %p0 = getelementptr i32, i32* %p, i32 2
496  %p1 = getelementptr i32, i32* %p, i32 3
497  %ld0 = load i32, i32 *%p0
498  %ld1 = load i32, i32 *%p1
499  %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8)
500  ret i32 %res
501}
502
503define i64 @combine_fshl_load_i64(i64* %p) nounwind {
504; X86-LABEL: combine_fshl_load_i64:
505; X86:       # %bb.0:
506; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
507; X86-NEXT:    movl 13(%ecx), %eax
508; X86-NEXT:    movl 17(%ecx), %edx
509; X86-NEXT:    retl
510;
511; X64-LABEL: combine_fshl_load_i64:
512; X64:       # %bb.0:
513; X64-NEXT:    movq 13(%rdi), %rax
514; X64-NEXT:    retq
515  %p0 = getelementptr i64, i64* %p, i64 1
516  %p1 = getelementptr i64, i64* %p, i64 2
517  %ld0 = load i64, i64 *%p0
518  %ld1 = load i64, i64 *%p1
519  %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24)
520  ret i64 %res
521}
522
523!llvm.module.flags = !{!0}
524!0 = !{i32 1, !"ProfileSummary", !1}
525!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
526!2 = !{!"ProfileFormat", !"InstrProf"}
527!3 = !{!"TotalCount", i64 10000}
528!4 = !{!"MaxCount", i64 10}
529!5 = !{!"MaxInternalCount", i64 1}
530!6 = !{!"MaxFunctionCount", i64 1000}
531!7 = !{!"NumCounts", i64 3}
532!8 = !{!"NumFunctions", i64 3}
533!9 = !{!"DetailedSummary", !10}
534!10 = !{!11, !12, !13}
535!11 = !{i32 10000, i64 100, i32 1}
536!12 = !{i32 999000, i64 100, i32 1}
537!13 = !{i32 999999, i64 1, i32 2}
538!14 = !{!"function_entry_count", i64 0}
539