1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -instcombine < %s | FileCheck %s
3
4;
5; Tests for combining concat-able ops:
6; or(zext(OP(x)), shl(zext(OP(y)),bw/2))
7; -->
8; OP(or(zext(x), shl(zext(y),bw/2)))
9;
10
11; BSWAP
12
13; PR45715
14define i64 @concat_bswap32_unary_split(i64 %a0) {
15; CHECK-LABEL: @concat_bswap32_unary_split(
16; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A0:%.*]])
17; CHECK-NEXT:    ret i64 [[TMP1]]
18;
19  %1 = lshr i64 %a0, 32
20  %2 = trunc i64 %1 to i32
21  %3 = trunc i64 %a0 to i32
22  %4 = tail call i32 @llvm.bswap.i32(i32 %2)
23  %5 = tail call i32 @llvm.bswap.i32(i32 %3)
24  %6 = zext i32 %4 to i64
25  %7 = zext i32 %5 to i64
26  %8 = shl nuw i64 %7, 32
27  %9 = or i64 %6, %8
28  ret i64 %9
29}
30
31define <2 x i64> @concat_bswap32_unary_split_vector(<2 x i64> %a0) {
32; CHECK-LABEL: @concat_bswap32_unary_split_vector(
33; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[A0:%.*]])
34; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
35;
36  %1 = lshr <2 x i64> %a0, <i64 32, i64 32>
37  %2 = trunc <2 x i64> %1 to <2 x i32>
38  %3 = trunc <2 x i64> %a0 to <2 x i32>
39  %4 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
40  %5 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %3)
41  %6 = zext <2 x i32> %4 to <2 x i64>
42  %7 = zext <2 x i32> %5 to <2 x i64>
43  %8 = shl nuw <2 x i64> %7, <i64 32, i64 32>
44  %9 = or <2 x i64> %6, %8
45  ret <2 x i64> %9
46}
47
48define i64 @concat_bswap32_unary_flip(i64 %a0) {
49; CHECK-LABEL: @concat_bswap32_unary_flip(
50; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
51; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
52; CHECK-NEXT:    ret i64 [[TMP2]]
53;
54  %1 = lshr i64 %a0, 32
55  %2 = trunc i64 %1 to i32
56  %3 = trunc i64 %a0 to i32
57  %4 = tail call i32 @llvm.bswap.i32(i32 %2)
58  %5 = tail call i32 @llvm.bswap.i32(i32 %3)
59  %6 = zext i32 %4 to i64
60  %7 = zext i32 %5 to i64
61  %8 = shl nuw i64 %6, 32
62  %9 = or i64 %7, %8
63  ret i64 %9
64}
65
66define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) {
67; CHECK-LABEL: @concat_bswap32_unary_flip_vector(
68; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
69; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
70; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
71;
72  %1 = lshr <2 x i64> %a0, <i64 32, i64 32>
73  %2 = trunc <2 x i64> %1 to <2 x i32>
74  %3 = trunc <2 x i64> %a0 to <2 x i32>
75  %4 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
76  %5 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %3)
77  %6 = zext <2 x i32> %4 to <2 x i64>
78  %7 = zext <2 x i32> %5 to <2 x i64>
79  %8 = shl nuw <2 x i64> %6, <i64 32, i64 32>
80  %9 = or <2 x i64> %7, %8
81  ret <2 x i64> %9
82}
83
84define i64 @concat_bswap32_binary(i32 %a0, i32 %a1) {
85; CHECK-LABEL: @concat_bswap32_binary(
86; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A1:%.*]] to i64
87; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[A0:%.*]] to i64
88; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 32
89; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], [[TMP1]]
90; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
91; CHECK-NEXT:    ret i64 [[TMP5]]
92;
93  %1 = tail call i32 @llvm.bswap.i32(i32 %a0)
94  %2 = tail call i32 @llvm.bswap.i32(i32 %a1)
95  %3 = zext i32 %1 to i64
96  %4 = zext i32 %2 to i64
97  %5 = shl nuw i64 %4, 32
98  %6 = or i64 %3, %5
99  ret i64 %6
100}
101
102define <2 x i64> @concat_bswap32_binary_vector(<2 x i32> %a0, <2 x i32> %a1) {
103; CHECK-LABEL: @concat_bswap32_binary_vector(
104; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
105; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
106; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
107; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]]
108; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP4]])
109; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
110;
111  %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a0)
112  %2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a1)
113  %3 = zext <2 x i32> %1 to <2 x i64>
114  %4 = zext <2 x i32> %2 to <2 x i64>
115  %5 = shl nuw <2 x i64> %4, <i64 32, i64 32>
116  %6 = or <2 x i64> %3, %5
117  ret <2 x i64> %6
118}
119
120declare i32 @llvm.bswap.i32(i32)
121declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
122
123; BITREVERSE
124
125define i64 @concat_bitreverse32_unary_split(i64 %a0) {
126; CHECK-LABEL: @concat_bitreverse32_unary_split(
127; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[A0:%.*]])
128; CHECK-NEXT:    ret i64 [[TMP1]]
129;
130  %1 = lshr i64 %a0, 32
131  %2 = trunc i64 %1 to i32
132  %3 = trunc i64 %a0 to i32
133  %4 = tail call i32 @llvm.bitreverse.i32(i32 %2)
134  %5 = tail call i32 @llvm.bitreverse.i32(i32 %3)
135  %6 = zext i32 %4 to i64
136  %7 = zext i32 %5 to i64
137  %8 = shl nuw i64 %7, 32
138  %9 = or i64 %6, %8
139  ret i64 %9
140}
141
142define <2 x i64> @concat_bitreverse32_unary_split_vector(<2 x i64> %a0) {
143; CHECK-LABEL: @concat_bitreverse32_unary_split_vector(
144; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[A0:%.*]])
145; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
146;
147  %1 = lshr <2 x i64> %a0, <i64 32, i64 32>
148  %2 = trunc <2 x i64> %1 to <2 x i32>
149  %3 = trunc <2 x i64> %a0 to <2 x i32>
150  %4 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %2)
151  %5 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %3)
152  %6 = zext <2 x i32> %4 to <2 x i64>
153  %7 = zext <2 x i32> %5 to <2 x i64>
154  %8 = shl nuw <2 x i64> %7, <i64 32, i64 32>
155  %9 = or <2 x i64> %6, %8
156  ret <2 x i64> %9
157}
158
159define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
160; CHECK-LABEL: @concat_bitreverse32_unary_flip(
161; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
162; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP1]])
163; CHECK-NEXT:    ret i64 [[TMP2]]
164;
165  %1 = lshr i64 %a0, 32
166  %2 = trunc i64 %1 to i32
167  %3 = trunc i64 %a0 to i32
168  %4 = tail call i32 @llvm.bitreverse.i32(i32 %2)
169  %5 = tail call i32 @llvm.bitreverse.i32(i32 %3)
170  %6 = zext i32 %4 to i64
171  %7 = zext i32 %5 to i64
172  %8 = shl nuw i64 %6, 32
173  %9 = or i64 %7, %8
174  ret i64 %9
175}
176
177define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) {
178; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector(
179; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
180; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
181; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
182;
183  %1 = lshr <2 x i64> %a0, <i64 32, i64 32>
184  %2 = trunc <2 x i64> %1 to <2 x i32>
185  %3 = trunc <2 x i64> %a0 to <2 x i32>
186  %4 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %2)
187  %5 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %3)
188  %6 = zext <2 x i32> %4 to <2 x i64>
189  %7 = zext <2 x i32> %5 to <2 x i64>
190  %8 = shl nuw <2 x i64> %6, <i64 32, i64 32>
191  %9 = or <2 x i64> %7, %8
192  ret <2 x i64> %9
193}
194
195define i64 @concat_bitreverse32_binary(i32 %a0, i32 %a1) {
196; CHECK-LABEL: @concat_bitreverse32_binary(
197; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A1:%.*]] to i64
198; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[A0:%.*]] to i64
199; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 32
200; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], [[TMP1]]
201; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP4]])
202; CHECK-NEXT:    ret i64 [[TMP5]]
203;
204  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a0)
205  %2 = tail call i32 @llvm.bitreverse.i32(i32 %a1)
206  %3 = zext i32 %1 to i64
207  %4 = zext i32 %2 to i64
208  %5 = shl nuw i64 %4, 32
209  %6 = or i64 %3, %5
210  ret i64 %6
211}
212
213define <2 x i64> @concat_bitreverse32_binary_vector(<2 x i32> %a0, <2 x i32> %a1) {
214; CHECK-LABEL: @concat_bitreverse32_binary_vector(
215; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
216; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
217; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
218; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]]
219; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP4]])
220; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
221;
222  %1 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a0)
223  %2 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a1)
224  %3 = zext <2 x i32> %1 to <2 x i64>
225  %4 = zext <2 x i32> %2 to <2 x i64>
226  %5 = shl nuw <2 x i64> %4, <i64 32, i64 32>
227  %6 = or <2 x i64> %3, %5
228  ret <2 x i64> %6
229}
230
231declare i32 @llvm.bitreverse.i32(i32)
232declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
233