1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11
12declare i8 @llvm.fshr.i8(i8, i8, i8)
13declare i16 @llvm.fshr.i16(i16, i16, i16)
14declare i32 @llvm.fshr.i32(i32, i32, i32)
15declare i64 @llvm.fshr.i64(i64, i64, i64)
16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
17
18; When first 2 operands match, it's a rotate.
19
20define i8 @rotl_i8_const_shift(i8 %x) {
21; CHECK-LABEL: rotl_i8_const_shift:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    rotlwi 4, 3, 27
24; CHECK-NEXT:    rlwimi 4, 3, 3, 0, 28
25; CHECK-NEXT:    mr 3, 4
26; CHECK-NEXT:    blr
27  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
28  ret i8 %f
29}
30
31define i64 @rotl_i64_const_shift(i64 %x) {
32; CHECK32-LABEL: rotl_i64_const_shift:
33; CHECK32:       # %bb.0:
34; CHECK32-NEXT:    rotlwi 5, 4, 3
35; CHECK32-NEXT:    rotlwi 6, 3, 3
36; CHECK32-NEXT:    rlwimi 5, 3, 3, 0, 28
37; CHECK32-NEXT:    rlwimi 6, 4, 3, 0, 28
38; CHECK32-NEXT:    mr 3, 5
39; CHECK32-NEXT:    mr 4, 6
40; CHECK32-NEXT:    blr
41;
42; CHECK64-LABEL: rotl_i64_const_shift:
43; CHECK64:       # %bb.0:
44; CHECK64-NEXT:    rotldi 3, 3, 3
45; CHECK64-NEXT:    blr
46  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
47  ret i64 %f
48}
49
50; When first 2 operands match, it's a rotate (by variable amount).
51
52define i16 @rotl_i16(i16 %x, i16 %z) {
53; CHECK32-LABEL: rotl_i16:
54; CHECK32:       # %bb.0:
55; CHECK32-NEXT:    clrlwi 6, 4, 28
56; CHECK32-NEXT:    neg 4, 4
57; CHECK32-NEXT:    clrlwi 5, 3, 16
58; CHECK32-NEXT:    clrlwi 4, 4, 28
59; CHECK32-NEXT:    slw 3, 3, 6
60; CHECK32-NEXT:    srw 4, 5, 4
61; CHECK32-NEXT:    or 3, 3, 4
62; CHECK32-NEXT:    blr
63;
64; CHECK64-LABEL: rotl_i16:
65; CHECK64:       # %bb.0:
66; CHECK64-NEXT:    neg 5, 4
67; CHECK64-NEXT:    clrlwi 6, 3, 16
68; CHECK64-NEXT:    clrlwi 4, 4, 28
69; CHECK64-NEXT:    clrlwi 5, 5, 28
70; CHECK64-NEXT:    slw 3, 3, 4
71; CHECK64-NEXT:    srw 4, 6, 5
72; CHECK64-NEXT:    or 3, 3, 4
73; CHECK64-NEXT:    blr
74  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
75  ret i16 %f
76}
77
78define i32 @rotl_i32(i32 %x, i32 %z) {
79; CHECK-LABEL: rotl_i32:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    rotlw 3, 3, 4
82; CHECK-NEXT:    blr
83  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
84  ret i32 %f
85}
86
87define i64 @rotl_i64(i64 %x, i64 %z) {
88; CHECK32_32-LABEL: rotl_i64:
89; CHECK32_32:       # %bb.0:
90; CHECK32_32-NEXT:    clrlwi 5, 6, 26
91; CHECK32_32-NEXT:    subfic 8, 5, 32
92; CHECK32_32-NEXT:    neg 6, 6
93; CHECK32_32-NEXT:    slw 7, 3, 5
94; CHECK32_32-NEXT:    addi 9, 5, -32
95; CHECK32_32-NEXT:    srw 8, 4, 8
96; CHECK32_32-NEXT:    clrlwi 6, 6, 26
97; CHECK32_32-NEXT:    slw 9, 4, 9
98; CHECK32_32-NEXT:    or 7, 7, 8
99; CHECK32_32-NEXT:    subfic 8, 6, 32
100; CHECK32_32-NEXT:    or 7, 7, 9
101; CHECK32_32-NEXT:    addi 9, 6, -32
102; CHECK32_32-NEXT:    slw 8, 3, 8
103; CHECK32_32-NEXT:    srw 9, 3, 9
104; CHECK32_32-NEXT:    srw 3, 3, 6
105; CHECK32_32-NEXT:    srw 6, 4, 6
106; CHECK32_32-NEXT:    or 6, 6, 8
107; CHECK32_32-NEXT:    or 6, 6, 9
108; CHECK32_32-NEXT:    slw 4, 4, 5
109; CHECK32_32-NEXT:    or 3, 7, 3
110; CHECK32_32-NEXT:    or 4, 4, 6
111; CHECK32_32-NEXT:    blr
112;
113; CHECK32_64-LABEL: rotl_i64:
114; CHECK32_64:       # %bb.0:
115; CHECK32_64-NEXT:    clrlwi 5, 6, 26
116; CHECK32_64-NEXT:    neg 6, 6
117; CHECK32_64-NEXT:    subfic 8, 5, 32
118; CHECK32_64-NEXT:    slw 7, 3, 5
119; CHECK32_64-NEXT:    clrlwi 6, 6, 26
120; CHECK32_64-NEXT:    srw 8, 4, 8
121; CHECK32_64-NEXT:    addi 9, 5, -32
122; CHECK32_64-NEXT:    or 7, 7, 8
123; CHECK32_64-NEXT:    subfic 8, 6, 32
124; CHECK32_64-NEXT:    slw 5, 4, 5
125; CHECK32_64-NEXT:    slw 9, 4, 9
126; CHECK32_64-NEXT:    srw 10, 3, 6
127; CHECK32_64-NEXT:    srw 4, 4, 6
128; CHECK32_64-NEXT:    addi 6, 6, -32
129; CHECK32_64-NEXT:    slw 8, 3, 8
130; CHECK32_64-NEXT:    srw 3, 3, 6
131; CHECK32_64-NEXT:    or 4, 4, 8
132; CHECK32_64-NEXT:    or 6, 7, 9
133; CHECK32_64-NEXT:    or 4, 4, 3
134; CHECK32_64-NEXT:    or 3, 6, 10
135; CHECK32_64-NEXT:    or 4, 5, 4
136; CHECK32_64-NEXT:    blr
137;
138; CHECK64-LABEL: rotl_i64:
139; CHECK64:       # %bb.0:
140; CHECK64-NEXT:    rotld 3, 3, 4
141; CHECK64-NEXT:    blr
142  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
143  ret i64 %f
144}
145
146; Vector rotate.
147
148define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
149; CHECK32_32-LABEL: rotl_v4i32:
150; CHECK32_32:       # %bb.0:
151; CHECK32_32-NEXT:    rotlw 3, 3, 7
152; CHECK32_32-NEXT:    rotlw 4, 4, 8
153; CHECK32_32-NEXT:    rotlw 5, 5, 9
154; CHECK32_32-NEXT:    rotlw 6, 6, 10
155; CHECK32_32-NEXT:    blr
156;
157; CHECK32_64-LABEL: rotl_v4i32:
158; CHECK32_64:       # %bb.0:
159; CHECK32_64-NEXT:    vrlw 2, 2, 3
160; CHECK32_64-NEXT:    blr
161;
162; CHECK64-LABEL: rotl_v4i32:
163; CHECK64:       # %bb.0:
164; CHECK64-NEXT:    vrlw 2, 2, 3
165; CHECK64-NEXT:    blr
166  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
167  ret <4 x i32> %f
168}
169
170; Vector rotate by constant splat amount.
171
172define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
173; CHECK32_32-LABEL: rotl_v4i32_const_shift:
174; CHECK32_32:       # %bb.0:
175; CHECK32_32-NEXT:    rotlwi 3, 3, 3
176; CHECK32_32-NEXT:    rotlwi 4, 4, 3
177; CHECK32_32-NEXT:    rotlwi 5, 5, 3
178; CHECK32_32-NEXT:    rotlwi 6, 6, 3
179; CHECK32_32-NEXT:    blr
180;
181; CHECK32_64-LABEL: rotl_v4i32_const_shift:
182; CHECK32_64:       # %bb.0:
183; CHECK32_64-NEXT:    vspltisw 3, 3
184; CHECK32_64-NEXT:    vrlw 2, 2, 3
185; CHECK32_64-NEXT:    blr
186;
187; CHECK64-LABEL: rotl_v4i32_const_shift:
188; CHECK64:       # %bb.0:
189; CHECK64-NEXT:    vspltisw 3, 3
190; CHECK64-NEXT:    vrlw 2, 2, 3
191; CHECK64-NEXT:    blr
192  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
193  ret <4 x i32> %f
194}
195
196; Repeat everything for funnel shift right.
197
198define i8 @rotr_i8_const_shift(i8 %x) {
199; CHECK-LABEL: rotr_i8_const_shift:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    rotlwi 4, 3, 29
202; CHECK-NEXT:    rlwimi 4, 3, 5, 0, 26
203; CHECK-NEXT:    mr 3, 4
204; CHECK-NEXT:    blr
205  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
206  ret i8 %f
207}
208
209define i32 @rotr_i32_const_shift(i32 %x) {
210; CHECK-LABEL: rotr_i32_const_shift:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    rotlwi 3, 3, 29
213; CHECK-NEXT:    blr
214  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
215  ret i32 %f
216}
217
218; When first 2 operands match, it's a rotate (by variable amount).
219
220define i16 @rotr_i16(i16 %x, i16 %z) {
221; CHECK32-LABEL: rotr_i16:
222; CHECK32:       # %bb.0:
223; CHECK32-NEXT:    clrlwi 6, 4, 28
224; CHECK32-NEXT:    neg 4, 4
225; CHECK32-NEXT:    clrlwi 5, 3, 16
226; CHECK32-NEXT:    clrlwi 4, 4, 28
227; CHECK32-NEXT:    srw 5, 5, 6
228; CHECK32-NEXT:    slw 3, 3, 4
229; CHECK32-NEXT:    or 3, 5, 3
230; CHECK32-NEXT:    blr
231;
232; CHECK64-LABEL: rotr_i16:
233; CHECK64:       # %bb.0:
234; CHECK64-NEXT:    neg 5, 4
235; CHECK64-NEXT:    clrlwi 6, 3, 16
236; CHECK64-NEXT:    clrlwi 4, 4, 28
237; CHECK64-NEXT:    clrlwi 5, 5, 28
238; CHECK64-NEXT:    srw 4, 6, 4
239; CHECK64-NEXT:    slw 3, 3, 5
240; CHECK64-NEXT:    or 3, 4, 3
241; CHECK64-NEXT:    blr
242  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
243  ret i16 %f
244}
245
246define i32 @rotr_i32(i32 %x, i32 %z) {
247; CHECK-LABEL: rotr_i32:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    neg 4, 4
250; CHECK-NEXT:    rotlw 3, 3, 4
251; CHECK-NEXT:    blr
252  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
253  ret i32 %f
254}
255
256define i64 @rotr_i64(i64 %x, i64 %z) {
257; CHECK32_32-LABEL: rotr_i64:
258; CHECK32_32:       # %bb.0:
259; CHECK32_32-NEXT:    clrlwi 5, 6, 26
260; CHECK32_32-NEXT:    subfic 8, 5, 32
261; CHECK32_32-NEXT:    neg 6, 6
262; CHECK32_32-NEXT:    srw 7, 4, 5
263; CHECK32_32-NEXT:    addi 9, 5, -32
264; CHECK32_32-NEXT:    slw 8, 3, 8
265; CHECK32_32-NEXT:    clrlwi 6, 6, 26
266; CHECK32_32-NEXT:    srw 9, 3, 9
267; CHECK32_32-NEXT:    or 7, 7, 8
268; CHECK32_32-NEXT:    subfic 8, 6, 32
269; CHECK32_32-NEXT:    or 7, 7, 9
270; CHECK32_32-NEXT:    addi 9, 6, -32
271; CHECK32_32-NEXT:    srw 8, 4, 8
272; CHECK32_32-NEXT:    slw 9, 4, 9
273; CHECK32_32-NEXT:    slw 4, 4, 6
274; CHECK32_32-NEXT:    slw 6, 3, 6
275; CHECK32_32-NEXT:    or 6, 6, 8
276; CHECK32_32-NEXT:    or 6, 6, 9
277; CHECK32_32-NEXT:    srw 3, 3, 5
278; CHECK32_32-NEXT:    or 4, 7, 4
279; CHECK32_32-NEXT:    or 3, 3, 6
280; CHECK32_32-NEXT:    blr
281;
282; CHECK32_64-LABEL: rotr_i64:
283; CHECK32_64:       # %bb.0:
284; CHECK32_64-NEXT:    clrlwi 5, 6, 26
285; CHECK32_64-NEXT:    neg 6, 6
286; CHECK32_64-NEXT:    subfic 8, 5, 32
287; CHECK32_64-NEXT:    srw 7, 4, 5
288; CHECK32_64-NEXT:    clrlwi 6, 6, 26
289; CHECK32_64-NEXT:    slw 8, 3, 8
290; CHECK32_64-NEXT:    addi 9, 5, -32
291; CHECK32_64-NEXT:    or 7, 7, 8
292; CHECK32_64-NEXT:    subfic 8, 6, 32
293; CHECK32_64-NEXT:    srw 5, 3, 5
294; CHECK32_64-NEXT:    srw 9, 3, 9
295; CHECK32_64-NEXT:    slw 10, 4, 6
296; CHECK32_64-NEXT:    slw 3, 3, 6
297; CHECK32_64-NEXT:    addi 6, 6, -32
298; CHECK32_64-NEXT:    srw 8, 4, 8
299; CHECK32_64-NEXT:    slw 4, 4, 6
300; CHECK32_64-NEXT:    or 3, 3, 8
301; CHECK32_64-NEXT:    or 6, 7, 9
302; CHECK32_64-NEXT:    or 3, 3, 4
303; CHECK32_64-NEXT:    or 4, 6, 10
304; CHECK32_64-NEXT:    or 3, 5, 3
305; CHECK32_64-NEXT:    blr
306;
307; CHECK64-LABEL: rotr_i64:
308; CHECK64:       # %bb.0:
309; CHECK64-NEXT:    neg 4, 4
310; CHECK64-NEXT:    rotld 3, 3, 4
311; CHECK64-NEXT:    blr
312  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
313  ret i64 %f
314}
315
316; Vector rotate.
317
318define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
319; CHECK32_32-LABEL: rotr_v4i32:
320; CHECK32_32:       # %bb.0:
321; CHECK32_32-NEXT:    neg 7, 7
322; CHECK32_32-NEXT:    neg 8, 8
323; CHECK32_32-NEXT:    neg 9, 9
324; CHECK32_32-NEXT:    neg 10, 10
325; CHECK32_32-NEXT:    rotlw 3, 3, 7
326; CHECK32_32-NEXT:    rotlw 4, 4, 8
327; CHECK32_32-NEXT:    rotlw 5, 5, 9
328; CHECK32_32-NEXT:    rotlw 6, 6, 10
329; CHECK32_32-NEXT:    blr
330;
331; CHECK32_64-LABEL: rotr_v4i32:
332; CHECK32_64:       # %bb.0:
333; CHECK32_64-NEXT:    vxor 4, 4, 4
334; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
335; CHECK32_64-NEXT:    vrlw 2, 2, 3
336; CHECK32_64-NEXT:    blr
337;
338; CHECK64-LABEL: rotr_v4i32:
339; CHECK64:       # %bb.0:
340; CHECK64-NEXT:    xxlxor 36, 36, 36
341; CHECK64-NEXT:    vsubuwm 3, 4, 3
342; CHECK64-NEXT:    vrlw 2, 2, 3
343; CHECK64-NEXT:    blr
344  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
345  ret <4 x i32> %f
346}
347
348; Vector rotate by constant splat amount.
349
350define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
351; CHECK32_32-LABEL: rotr_v4i32_const_shift:
352; CHECK32_32:       # %bb.0:
353; CHECK32_32-NEXT:    rotlwi 3, 3, 29
354; CHECK32_32-NEXT:    rotlwi 4, 4, 29
355; CHECK32_32-NEXT:    rotlwi 5, 5, 29
356; CHECK32_32-NEXT:    rotlwi 6, 6, 29
357; CHECK32_32-NEXT:    blr
358;
359; CHECK32_64-LABEL: rotr_v4i32_const_shift:
360; CHECK32_64:       # %bb.0:
361; CHECK32_64-NEXT:    vspltisw 3, -16
362; CHECK32_64-NEXT:    vspltisw 4, 13
363; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
364; CHECK32_64-NEXT:    vrlw 2, 2, 3
365; CHECK32_64-NEXT:    blr
366;
367; CHECK64-LABEL: rotr_v4i32_const_shift:
368; CHECK64:       # %bb.0:
369; CHECK64-NEXT:    vspltisw 3, -16
370; CHECK64-NEXT:    vspltisw 4, 13
371; CHECK64-NEXT:    vsubuwm 3, 4, 3
372; CHECK64-NEXT:    vrlw 2, 2, 3
373; CHECK64-NEXT:    blr
374  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
375  ret <4 x i32> %f
376}
377
378define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
379; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    blr
382  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
383  ret i32 %f
384}
385
386define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
387; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    blr
390  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
391  ret i32 %f
392}
393
394define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
395; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    blr
398  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
399  ret <4 x i32> %f
400}
401
402define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
403; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
404; CHECK:       # %bb.0:
405; CHECK-NEXT:    blr
406  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
407  ret <4 x i32> %f
408}
409
410