1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4declare i8 @llvm.fshl.i8(i8, i8, i8)
5declare i16 @llvm.fshl.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i64 @llvm.fshl.i64(i64, i64, i64)
8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
9
10declare i8 @llvm.fshr.i8(i8, i8, i8)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare i32 @llvm.fshr.i32(i32, i32, i32)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15
16; When first 2 operands match, it's a rotate.
17
18define i8 @rotl_i8_const_shift(i8 %x) {
19; CHECK-LABEL: rotl_i8_const_shift:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    ubfx w8, w0, #5, #3
22; CHECK-NEXT:    bfi w8, w0, #3, #29
23; CHECK-NEXT:    mov w0, w8
24; CHECK-NEXT:    ret
25  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
26  ret i8 %f
27}
28
29define i64 @rotl_i64_const_shift(i64 %x) {
30; CHECK-LABEL: rotl_i64_const_shift:
31; CHECK:       // %bb.0:
32; CHECK-NEXT:    ror x0, x0, #61
33; CHECK-NEXT:    ret
34  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
35  ret i64 %f
36}
37
38; When first 2 operands match, it's a rotate (by variable amount).
39
40define i16 @rotl_i16(i16 %x, i16 %z) {
41; CHECK-LABEL: rotl_i16:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    orr w10, wzr, #0x10
44; CHECK-NEXT:    sub w10, w10, w1
45; CHECK-NEXT:    and w8, w0, #0xffff
46; CHECK-NEXT:    and w9, w1, #0xf
47; CHECK-NEXT:    and w10, w10, #0xf
48; CHECK-NEXT:    lsl w9, w0, w9
49; CHECK-NEXT:    lsr w8, w8, w10
50; CHECK-NEXT:    orr w0, w9, w8
51; CHECK-NEXT:    ret
52  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
53  ret i16 %f
54}
55
56define i32 @rotl_i32(i32 %x, i32 %z) {
57; CHECK-LABEL: rotl_i32:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    orr w8, wzr, #0x20
60; CHECK-NEXT:    sub w8, w8, w1
61; CHECK-NEXT:    ror w0, w0, w8
62; CHECK-NEXT:    ret
63  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
64  ret i32 %f
65}
66
67define i64 @rotl_i64(i64 %x, i64 %z) {
68; CHECK-LABEL: rotl_i64:
69; CHECK:       // %bb.0:
70; CHECK-NEXT:    orr w9, wzr, #0x40
71; CHECK-NEXT:    sub w9, w9, w1
72; CHECK-NEXT:    lsl x8, x0, x1
73; CHECK-NEXT:    lsr x9, x0, x9
74; CHECK-NEXT:    orr x0, x8, x9
75; CHECK-NEXT:    ret
76  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
77  ret i64 %f
78}
79
80; Vector rotate.
81
82define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
83; CHECK-LABEL: rotl_v4i32:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    movi v2.4s, #31
86; CHECK-NEXT:    movi v3.4s, #32
87; CHECK-NEXT:    and v4.16b, v1.16b, v2.16b
88; CHECK-NEXT:    sub v1.4s, v3.4s, v1.4s
89; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
90; CHECK-NEXT:    neg v1.4s, v1.4s
91; CHECK-NEXT:    ushl v3.4s, v0.4s, v4.4s
92; CHECK-NEXT:    ushl v0.4s, v0.4s, v1.4s
93; CHECK-NEXT:    orr v0.16b, v3.16b, v0.16b
94; CHECK-NEXT:    ret
95  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
96  ret <4 x i32> %f
97}
98
99; Vector rotate by constant splat amount.
100
101define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
102; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    ushr v1.4s, v0.4s, #29
105; CHECK-NEXT:    shl v0.4s, v0.4s, #3
106; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
107; CHECK-NEXT:    ret
108  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
109  ret <4 x i32> %f
110}
111
112; Repeat everything for funnel shift right.
113
114; When first 2 operands match, it's a rotate.
115
116define i8 @rotr_i8_const_shift(i8 %x) {
117; CHECK-LABEL: rotr_i8_const_shift:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    ubfx w8, w0, #3, #5
120; CHECK-NEXT:    bfi w8, w0, #5, #27
121; CHECK-NEXT:    mov w0, w8
122; CHECK-NEXT:    ret
123  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
124  ret i8 %f
125}
126
127define i32 @rotr_i32_const_shift(i32 %x) {
128; CHECK-LABEL: rotr_i32_const_shift:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    ror w0, w0, #3
131; CHECK-NEXT:    ret
132  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
133  ret i32 %f
134}
135
136; When first 2 operands match, it's a rotate (by variable amount).
137
138define i16 @rotr_i16(i16 %x, i16 %z) {
139; CHECK-LABEL: rotr_i16:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    and w8, w0, #0xffff
142; CHECK-NEXT:    and w9, w1, #0xf
143; CHECK-NEXT:    orr w10, wzr, #0x10
144; CHECK-NEXT:    lsr w8, w8, w9
145; CHECK-NEXT:    sub w9, w10, w1
146; CHECK-NEXT:    and w9, w9, #0xf
147; CHECK-NEXT:    lsl w9, w0, w9
148; CHECK-NEXT:    orr w0, w9, w8
149; CHECK-NEXT:    ret
150  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
151  ret i16 %f
152}
153
154define i32 @rotr_i32(i32 %x, i32 %z) {
155; CHECK-LABEL: rotr_i32:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    ror w0, w0, w1
158; CHECK-NEXT:    ret
159  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
160  ret i32 %f
161}
162
163define i64 @rotr_i64(i64 %x, i64 %z) {
164; CHECK-LABEL: rotr_i64:
165; CHECK:       // %bb.0:
166; CHECK-NEXT:    ror x0, x0, x1
167; CHECK-NEXT:    ret
168  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
169  ret i64 %f
170}
171
172; Vector rotate.
173
174define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
175; CHECK-LABEL: rotr_v4i32:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    movi v2.4s, #31
178; CHECK-NEXT:    movi v3.4s, #32
179; CHECK-NEXT:    and v4.16b, v1.16b, v2.16b
180; CHECK-NEXT:    sub v1.4s, v3.4s, v1.4s
181; CHECK-NEXT:    neg v3.4s, v4.4s
182; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
183; CHECK-NEXT:    ushl v2.4s, v0.4s, v3.4s
184; CHECK-NEXT:    ushl v0.4s, v0.4s, v1.4s
185; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
186; CHECK-NEXT:    ret
187  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
188  ret <4 x i32> %f
189}
190
191; Vector rotate by constant splat amount.
192
193define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
194; CHECK-LABEL: rotr_v4i32_const_shift:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    ushr v1.4s, v0.4s, #3
197; CHECK-NEXT:    shl v0.4s, v0.4s, #29
198; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
199; CHECK-NEXT:    ret
200  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
201  ret <4 x i32> %f
202}
203
204define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
205; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    ret
208  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
209  ret i32 %f
210}
211
212define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
213; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
214; CHECK:       // %bb.0:
215; CHECK-NEXT:    ret
216  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
217  ret i32 %f
218}
219
220define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
221; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
222; CHECK:       // %bb.0:
223; CHECK-NEXT:    ret
224  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
225  ret <4 x i32> %f
226}
227
228define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
229; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    ret
232  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
233  ret <4 x i32> %f
234}
235
236