1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; When first 2 operands match, it's a rotate.
18
19define i8 @rotl_i8_const_shift(i8 %x) nounwind {
20; X32-SSE2-LABEL: rotl_i8_const_shift:
21; X32-SSE2:       # %bb.0:
22; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
23; X32-SSE2-NEXT:    rolb $3, %al
24; X32-SSE2-NEXT:    retl
25;
26; X64-AVX2-LABEL: rotl_i8_const_shift:
27; X64-AVX2:       # %bb.0:
28; X64-AVX2-NEXT:    movl %edi, %eax
29; X64-AVX2-NEXT:    rolb $3, %al
30; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
31; X64-AVX2-NEXT:    retq
32  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
33  ret i8 %f
34}
35
36define i8 @rotl_i8_const_shift1(i8 %x) nounwind {
37; X32-SSE2-LABEL: rotl_i8_const_shift1:
38; X32-SSE2:       # %bb.0:
39; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
40; X32-SSE2-NEXT:    rolb %al
41; X32-SSE2-NEXT:    retl
42;
43; X64-AVX2-LABEL: rotl_i8_const_shift1:
44; X64-AVX2:       # %bb.0:
45; X64-AVX2-NEXT:    movl %edi, %eax
46; X64-AVX2-NEXT:    rolb %al
47; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
48; X64-AVX2-NEXT:    retq
49  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 1)
50  ret i8 %f
51}
52
53define i8 @rotl_i8_const_shift7(i8 %x) nounwind {
54; X32-SSE2-LABEL: rotl_i8_const_shift7:
55; X32-SSE2:       # %bb.0:
56; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
57; X32-SSE2-NEXT:    rorb %al
58; X32-SSE2-NEXT:    retl
59;
60; X64-AVX2-LABEL: rotl_i8_const_shift7:
61; X64-AVX2:       # %bb.0:
62; X64-AVX2-NEXT:    movl %edi, %eax
63; X64-AVX2-NEXT:    rorb %al
64; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
65; X64-AVX2-NEXT:    retq
66  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 7)
67  ret i8 %f
68}
69
70define i64 @rotl_i64_const_shift(i64 %x) nounwind {
71; X32-SSE2-LABEL: rotl_i64_const_shift:
72; X32-SSE2:       # %bb.0:
73; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
74; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
75; X32-SSE2-NEXT:    movl %ecx, %eax
76; X32-SSE2-NEXT:    shldl $3, %edx, %eax
77; X32-SSE2-NEXT:    shldl $3, %ecx, %edx
78; X32-SSE2-NEXT:    retl
79;
80; X64-AVX2-LABEL: rotl_i64_const_shift:
81; X64-AVX2:       # %bb.0:
82; X64-AVX2-NEXT:    movq %rdi, %rax
83; X64-AVX2-NEXT:    rolq $3, %rax
84; X64-AVX2-NEXT:    retq
85  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
86  ret i64 %f
87}
88
89define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
90; X32-SSE2-LABEL: rotl_i16:
91; X32-SSE2:       # %bb.0:
92; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
93; X32-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
94; X32-SSE2-NEXT:    rolw %cl, %ax
95; X32-SSE2-NEXT:    retl
96;
97; X64-AVX2-LABEL: rotl_i16:
98; X64-AVX2:       # %bb.0:
99; X64-AVX2-NEXT:    movl %esi, %ecx
100; X64-AVX2-NEXT:    movl %edi, %eax
101; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
102; X64-AVX2-NEXT:    rolw %cl, %ax
103; X64-AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
104; X64-AVX2-NEXT:    retq
105  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
106  ret i16 %f
107}
108
109define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
110; X32-SSE2-LABEL: rotl_i32:
111; X32-SSE2:       # %bb.0:
112; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
113; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
114; X32-SSE2-NEXT:    roll %cl, %eax
115; X32-SSE2-NEXT:    retl
116;
117; X64-AVX2-LABEL: rotl_i32:
118; X64-AVX2:       # %bb.0:
119; X64-AVX2-NEXT:    movl %esi, %ecx
120; X64-AVX2-NEXT:    movl %edi, %eax
121; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
122; X64-AVX2-NEXT:    roll %cl, %eax
123; X64-AVX2-NEXT:    retq
124  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
125  ret i32 %f
126}
127
128; Vector rotate.
129
130define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
131; X32-SSE2-LABEL: rotl_v4i32:
132; X32-SSE2:       # %bb.0:
133; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
134; X32-SSE2-NEXT:    pslld $23, %xmm1
135; X32-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
136; X32-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
137; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
138; X32-SSE2-NEXT:    pmuludq %xmm1, %xmm0
139; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
140; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
141; X32-SSE2-NEXT:    pmuludq %xmm2, %xmm1
142; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
143; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
144; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
145; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
146; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
147; X32-SSE2-NEXT:    por %xmm3, %xmm0
148; X32-SSE2-NEXT:    retl
149;
150; X64-AVX2-LABEL: rotl_v4i32:
151; X64-AVX2:       # %bb.0:
152; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
153; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
154; X64-AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
155; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
156; X64-AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
157; X64-AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
158; X64-AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
159; X64-AVX2-NEXT:    retq
160  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
161  ret <4 x i32> %f
162}
163
164; Vector rotate by constant splat amount.
165
166define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
167; X32-SSE2-LABEL: rotl_v4i32_const_shift:
168; X32-SSE2:       # %bb.0:
169; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
170; X32-SSE2-NEXT:    psrld $29, %xmm1
171; X32-SSE2-NEXT:    pslld $3, %xmm0
172; X32-SSE2-NEXT:    por %xmm1, %xmm0
173; X32-SSE2-NEXT:    retl
174;
175; X64-AVX2-LABEL: rotl_v4i32_const_shift:
176; X64-AVX2:       # %bb.0:
177; X64-AVX2-NEXT:    vpsrld $29, %xmm0, %xmm1
178; X64-AVX2-NEXT:    vpslld $3, %xmm0, %xmm0
179; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
180; X64-AVX2-NEXT:    retq
181  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
182  ret <4 x i32> %f
183}
184
185; Repeat everything for funnel shift right.
186
187define i8 @rotr_i8_const_shift(i8 %x) nounwind {
188; X32-SSE2-LABEL: rotr_i8_const_shift:
189; X32-SSE2:       # %bb.0:
190; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
191; X32-SSE2-NEXT:    rorb $3, %al
192; X32-SSE2-NEXT:    retl
193;
194; X64-AVX2-LABEL: rotr_i8_const_shift:
195; X64-AVX2:       # %bb.0:
196; X64-AVX2-NEXT:    movl %edi, %eax
197; X64-AVX2-NEXT:    rorb $3, %al
198; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
199; X64-AVX2-NEXT:    retq
200  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
201  ret i8 %f
202}
203
204define i8 @rotr_i8_const_shift1(i8 %x) nounwind {
205; X32-SSE2-LABEL: rotr_i8_const_shift1:
206; X32-SSE2:       # %bb.0:
207; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
208; X32-SSE2-NEXT:    rorb %al
209; X32-SSE2-NEXT:    retl
210;
211; X64-AVX2-LABEL: rotr_i8_const_shift1:
212; X64-AVX2:       # %bb.0:
213; X64-AVX2-NEXT:    movl %edi, %eax
214; X64-AVX2-NEXT:    rorb %al
215; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
216; X64-AVX2-NEXT:    retq
217  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 1)
218  ret i8 %f
219}
220
221define i8 @rotr_i8_const_shift7(i8 %x) nounwind {
222; X32-SSE2-LABEL: rotr_i8_const_shift7:
223; X32-SSE2:       # %bb.0:
224; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
225; X32-SSE2-NEXT:    rolb %al
226; X32-SSE2-NEXT:    retl
227;
228; X64-AVX2-LABEL: rotr_i8_const_shift7:
229; X64-AVX2:       # %bb.0:
230; X64-AVX2-NEXT:    movl %edi, %eax
231; X64-AVX2-NEXT:    rolb %al
232; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
233; X64-AVX2-NEXT:    retq
234  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 7)
235  ret i8 %f
236}
237
238define i32 @rotr_i32_const_shift(i32 %x) nounwind {
239; X32-SSE2-LABEL: rotr_i32_const_shift:
240; X32-SSE2:       # %bb.0:
241; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
242; X32-SSE2-NEXT:    rorl $3, %eax
243; X32-SSE2-NEXT:    retl
244;
245; X64-AVX2-LABEL: rotr_i32_const_shift:
246; X64-AVX2:       # %bb.0:
247; X64-AVX2-NEXT:    movl %edi, %eax
248; X64-AVX2-NEXT:    rorl $3, %eax
249; X64-AVX2-NEXT:    retq
250  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
251  ret i32 %f
252}
253
254; When first 2 operands match, it's a rotate (by variable amount).
255
256define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
257; X32-SSE2-LABEL: rotr_i16:
258; X32-SSE2:       # %bb.0:
259; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
260; X32-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
261; X32-SSE2-NEXT:    rorw %cl, %ax
262; X32-SSE2-NEXT:    retl
263;
264; X64-AVX2-LABEL: rotr_i16:
265; X64-AVX2:       # %bb.0:
266; X64-AVX2-NEXT:    movl %esi, %ecx
267; X64-AVX2-NEXT:    movl %edi, %eax
268; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
269; X64-AVX2-NEXT:    rorw %cl, %ax
270; X64-AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
271; X64-AVX2-NEXT:    retq
272  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
273  ret i16 %f
274}
275
276define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
277; X32-SSE2-LABEL: rotr_i64:
278; X32-SSE2:       # %bb.0:
279; X32-SSE2-NEXT:    pushl %ebp
280; X32-SSE2-NEXT:    pushl %ebx
281; X32-SSE2-NEXT:    pushl %edi
282; X32-SSE2-NEXT:    pushl %esi
283; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
284; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
285; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
286; X32-SSE2-NEXT:    movl %edx, %edi
287; X32-SSE2-NEXT:    shrl %cl, %edi
288; X32-SSE2-NEXT:    movl %esi, %ebx
289; X32-SSE2-NEXT:    shrdl %cl, %edx, %ebx
290; X32-SSE2-NEXT:    xorl %ebp, %ebp
291; X32-SSE2-NEXT:    testb $32, %cl
292; X32-SSE2-NEXT:    cmovnel %edi, %ebx
293; X32-SSE2-NEXT:    cmovnel %ebp, %edi
294; X32-SSE2-NEXT:    negb %cl
295; X32-SSE2-NEXT:    movl %esi, %eax
296; X32-SSE2-NEXT:    shll %cl, %eax
297; X32-SSE2-NEXT:    shldl %cl, %esi, %edx
298; X32-SSE2-NEXT:    testb $32, %cl
299; X32-SSE2-NEXT:    cmovnel %eax, %edx
300; X32-SSE2-NEXT:    cmovnel %ebp, %eax
301; X32-SSE2-NEXT:    orl %ebx, %eax
302; X32-SSE2-NEXT:    orl %edi, %edx
303; X32-SSE2-NEXT:    popl %esi
304; X32-SSE2-NEXT:    popl %edi
305; X32-SSE2-NEXT:    popl %ebx
306; X32-SSE2-NEXT:    popl %ebp
307; X32-SSE2-NEXT:    retl
308;
309; X64-AVX2-LABEL: rotr_i64:
310; X64-AVX2:       # %bb.0:
311; X64-AVX2-NEXT:    movq %rsi, %rcx
312; X64-AVX2-NEXT:    movq %rdi, %rax
313; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
314; X64-AVX2-NEXT:    rorq %cl, %rax
315; X64-AVX2-NEXT:    retq
316  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
317  ret i64 %f
318}
319
320; Vector rotate.
321
322define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
323; X32-SSE2-LABEL: rotr_v4i32:
324; X32-SSE2:       # %bb.0:
325; X32-SSE2-NEXT:    pxor %xmm2, %xmm2
326; X32-SSE2-NEXT:    psubd %xmm1, %xmm2
327; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
328; X32-SSE2-NEXT:    pslld $23, %xmm2
329; X32-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm2
330; X32-SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
331; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
332; X32-SSE2-NEXT:    pmuludq %xmm1, %xmm0
333; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
334; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
335; X32-SSE2-NEXT:    pmuludq %xmm2, %xmm1
336; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
337; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
338; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
339; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
340; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
341; X32-SSE2-NEXT:    por %xmm3, %xmm0
342; X32-SSE2-NEXT:    retl
343;
344; X64-AVX2-LABEL: rotr_v4i32:
345; X64-AVX2:       # %bb.0:
346; X64-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
347; X64-AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
348; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
349; X64-AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
350; X64-AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
351; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
352; X64-AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
353; X64-AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
354; X64-AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
355; X64-AVX2-NEXT:    retq
356  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
357  ret <4 x i32> %f
358}
359
360; Vector rotate by constant splat amount.
361
362define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) nounwind {
363; X32-SSE2-LABEL: rotr_v4i32_const_shift:
364; X32-SSE2:       # %bb.0:
365; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
366; X32-SSE2-NEXT:    psrld $3, %xmm1
367; X32-SSE2-NEXT:    pslld $29, %xmm0
368; X32-SSE2-NEXT:    por %xmm1, %xmm0
369; X32-SSE2-NEXT:    retl
370;
371; X64-AVX2-LABEL: rotr_v4i32_const_shift:
372; X64-AVX2:       # %bb.0:
373; X64-AVX2-NEXT:    vpsrld $3, %xmm0, %xmm1
374; X64-AVX2-NEXT:    vpslld $29, %xmm0, %xmm0
375; X64-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
376; X64-AVX2-NEXT:    retq
377  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
378  ret <4 x i32> %f
379}
380
381define i32 @rotl_i32_shift_by_bitwidth(i32 %x) nounwind {
382; X32-SSE2-LABEL: rotl_i32_shift_by_bitwidth:
383; X32-SSE2:       # %bb.0:
384; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
385; X32-SSE2-NEXT:    retl
386;
387; X64-AVX2-LABEL: rotl_i32_shift_by_bitwidth:
388; X64-AVX2:       # %bb.0:
389; X64-AVX2-NEXT:    movl %edi, %eax
390; X64-AVX2-NEXT:    retq
391  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
392  ret i32 %f
393}
394
395define i32 @rotr_i32_shift_by_bitwidth(i32 %x) nounwind {
396; X32-SSE2-LABEL: rotr_i32_shift_by_bitwidth:
397; X32-SSE2:       # %bb.0:
398; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
399; X32-SSE2-NEXT:    retl
400;
401; X64-AVX2-LABEL: rotr_i32_shift_by_bitwidth:
402; X64-AVX2:       # %bb.0:
403; X64-AVX2-NEXT:    movl %edi, %eax
404; X64-AVX2-NEXT:    retq
405  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
406  ret i32 %f
407}
408
409define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
410; ANY-LABEL: rotl_v4i32_shift_by_bitwidth:
411; ANY:       # %bb.0:
412; ANY-NEXT:    ret{{[l|q]}}
413  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
414  ret <4 x i32> %f
415}
416
417define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
418; ANY-LABEL: rotr_v4i32_shift_by_bitwidth:
419; ANY:       # %bb.0:
420; ANY-NEXT:    ret{{[l|q]}}
421  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
422  ret <4 x i32> %f
423}
424
425; Non power-of-2 types can't use the negated shift amount to avoid a select.
426
427declare i7 @llvm.fshl.i7(i7, i7, i7)
428declare i7 @llvm.fshr.i7(i7, i7, i7)
429
430; extract(concat(0b1110000, 0b1110000) << 9) = 0b1000011
431; Try an oversized shift to test modulo functionality.
432
433define i7 @fshl_i7() {
434; ANY-LABEL: fshl_i7:
435; ANY:       # %bb.0:
436; ANY-NEXT:    movb $67, %al
437; ANY-NEXT:    ret{{[l|q]}}
438  %f = call i7 @llvm.fshl.i7(i7 112, i7 112, i7 9)
439  ret i7 %f
440}
441
442; extract(concat(0b1110001, 0b1110001) >> 16) = 0b0111100
443; Try an oversized shift to test modulo functionality.
444
445define i7 @fshr_i7() {
446; ANY-LABEL: fshr_i7:
447; ANY:       # %bb.0:
448; ANY-NEXT:    movb $60, %al
449; ANY-NEXT:    ret{{[l|q]}}
450  %f = call i7 @llvm.fshr.i7(i7 113, i7 113, i7 16)
451  ret i7 %f
452}
453
454