1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; Given pattern:
5;   (x shiftopcode Q) shiftopcode K
6; we should rewrite it as
7;   x shiftopcode (Q+K)  iff (Q+K) u< bitwidth(x)
8; This is valid for any shift, but they must be identical.
9; THIS FOLD DOES *NOT* REQUIRE ANY 'exact'/'nuw'/`nsw` FLAGS!
10
11; Basic scalar test
12
13define i32 @t0(i32 %x, i32 %y) {
14; CHECK-LABEL: @t0(
15; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[X:%.*]], 30
16; CHECK-NEXT:    ret i32 [[T3]]
17;
18  %t0 = sub i32 32, %y
19  %t1 = lshr i32 %x, %t0
20  %t2 = add i32 %y, -2
21  %t3 = lshr exact i32 %t1, %t2 ; while there, test that we don't propagate partial 'exact' flag
22  ret i32 %t3
23}
24
25define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) {
26; CHECK-LABEL: @t1_vec_splat(
27; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
28; CHECK-NEXT:    ret <2 x i32> [[T3]]
29;
30  %t0 = sub <2 x i32> <i32 32, i32 32>, %y
31  %t1 = lshr <2 x i32> %x, %t0
32  %t2 = add <2 x i32> %y, <i32 -2, i32 -2>
33  %t3 = lshr <2 x i32> %t1, %t2
34  ret <2 x i32> %t3
35}
36
37define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
38; CHECK-LABEL: @t2_vec_nonsplat(
39; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
40; CHECK-NEXT:    ret <2 x i32> [[T3]]
41;
42  %t0 = sub <2 x i32> <i32 32, i32 30>, %y
43  %t1 = lshr <2 x i32> %x, %t0
44  %t2 = add <2 x i32> %y, <i32 -2, i32 0>
45  %t3 = lshr <2 x i32> %t1, %t2
46  ret <2 x i32> %t3
47}
48
49; Basic vector tests
50
51define <3 x i32> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i32> %y) {
52; CHECK-LABEL: @t3_vec_nonsplat_undef0(
53; CHECK-NEXT:    [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
54; CHECK-NEXT:    ret <3 x i32> [[T3]]
55;
56  %t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y
57  %t1 = lshr <3 x i32> %x, %t0
58  %t2 = add <3 x i32> %y, <i32 -2, i32 -2, i32 -2>
59  %t3 = lshr <3 x i32> %t1, %t2
60  ret <3 x i32> %t3
61}
62
63define <3 x i32> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {
64; CHECK-LABEL: @t4_vec_nonsplat_undef1(
65; CHECK-NEXT:    [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
66; CHECK-NEXT:    ret <3 x i32> [[T3]]
67;
68  %t0 = sub <3 x i32> <i32 32, i32 32, i32 32>, %y
69  %t1 = lshr <3 x i32> %x, %t0
70  %t2 = add <3 x i32> %y, <i32 -2, i32 undef, i32 -2>
71  %t3 = lshr <3 x i32> %t1, %t2
72  ret <3 x i32> %t3
73}
74
75define <3 x i32> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {
76; CHECK-LABEL: @t5_vec_nonsplat_undef1(
77; CHECK-NEXT:    [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
78; CHECK-NEXT:    ret <3 x i32> [[T3]]
79;
80  %t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y
81  %t1 = lshr <3 x i32> %x, %t0
82  %t2 = add <3 x i32> %y, <i32 -2, i32 undef, i32 -2>
83  %t3 = lshr <3 x i32> %t1, %t2
84  ret <3 x i32> %t3
85}
86
87; Some other shift opcodes
88define i32 @t6_shl(i32 %x, i32 %y) {
89; CHECK-LABEL: @t6_shl(
90; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[X:%.*]], 30
91; CHECK-NEXT:    ret i32 [[T3]]
92;
93  %t0 = sub i32 32, %y
94  %t1 = shl nuw i32 %x, %t0 ; while there, test that we don't propagate partial 'nuw' flag
95  %t2 = add i32 %y, -2
96  %t3 = shl nsw i32 %t1, %t2 ; while there, test that we don't propagate partial 'nsw' flag
97  ret i32 %t3
98}
99define i32 @t7_ashr(i32 %x, i32 %y) {
100; CHECK-LABEL: @t7_ashr(
101; CHECK-NEXT:    [[T3:%.*]] = ashr i32 [[X:%.*]], 30
102; CHECK-NEXT:    ret i32 [[T3]]
103;
104  %t0 = sub i32 32, %y
105  %t1 = ashr exact i32 %x, %t0 ; while there, test that we don't propagate partial 'exact' flag
106  %t2 = add i32 %y, -2
107  %t3 = ashr i32 %t1, %t2
108  ret i32 %t3
109}
110
111; If the same flag is present on both shifts, it can be kept.
112define i32 @t8_lshr_exact_flag_preservation(i32 %x, i32 %y) {
113; CHECK-LABEL: @t8_lshr_exact_flag_preservation(
114; CHECK-NEXT:    [[T3:%.*]] = lshr exact i32 [[X:%.*]], 30
115; CHECK-NEXT:    ret i32 [[T3]]
116;
117  %t0 = sub i32 32, %y
118  %t1 = lshr exact i32 %x, %t0
119  %t2 = add i32 %y, -2
120  %t3 = lshr exact i32 %t1, %t2
121  ret i32 %t3
122}
123define i32 @t9_ashr_exact_flag_preservation(i32 %x, i32 %y) {
124; CHECK-LABEL: @t9_ashr_exact_flag_preservation(
125; CHECK-NEXT:    [[T3:%.*]] = ashr exact i32 [[X:%.*]], 30
126; CHECK-NEXT:    ret i32 [[T3]]
127;
128  %t0 = sub i32 32, %y
129  %t1 = ashr exact i32 %x, %t0
130  %t2 = add i32 %y, -2
131  %t3 = ashr exact i32 %t1, %t2
132  ret i32 %t3
133}
134define i32 @t10_shl_nuw_flag_preservation(i32 %x, i32 %y) {
135; CHECK-LABEL: @t10_shl_nuw_flag_preservation(
136; CHECK-NEXT:    [[T3:%.*]] = shl nuw i32 [[X:%.*]], 30
137; CHECK-NEXT:    ret i32 [[T3]]
138;
139  %t0 = sub i32 32, %y
140  %t1 = shl nuw i32 %x, %t0
141  %t2 = add i32 %y, -2
142  %t3 = shl nuw nsw i32 %t1, %t2 ; only 'nuw' should be propagated.
143  ret i32 %t3
144}
145define i32 @t11_shl_nsw_flag_preservation(i32 %x, i32 %y) {
146; CHECK-LABEL: @t11_shl_nsw_flag_preservation(
147; CHECK-NEXT:    [[T3:%.*]] = shl nsw i32 [[X:%.*]], 30
148; CHECK-NEXT:    ret i32 [[T3]]
149;
150  %t0 = sub i32 32, %y
151  %t1 = shl nsw i32 %x, %t0
152  %t2 = add i32 %y, -2
153  %t3 = shl nsw nuw i32 %t1, %t2 ; only 'nuw' should be propagated.
154  ret i32 %t3
155}
156
157; Reduced from https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=15587
158@X = external global i32
159define i64 @constantexpr() {
160; CHECK-LABEL: @constantexpr(
161; CHECK-NEXT:    ret i64 0
162;
163  %A = alloca i64
164  %L = load i64, i64* %A
165  %V = add i64 ptrtoint (i32* @X to i64), 0
166  %B2 = shl i64 %V, 0
167  %B4 = ashr i64 %B2, %L
168  %B = and i64 undef, %B4
169  ret i64 %B
170}
171
172; No one-use tests since we will only produce a single instruction here.
173
174; Negative tests
175
176; Can't fold, total shift would be 32
177define i32 @n12(i32 %x, i32 %y) {
178; CHECK-LABEL: @n12(
179; CHECK-NEXT:    [[T0:%.*]] = sub i32 30, [[Y:%.*]]
180; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
181; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], 2
182; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[T1]], [[T2]]
183; CHECK-NEXT:    ret i32 [[T3]]
184;
185  %t0 = sub i32 30, %y
186  %t1 = lshr i32 %x, %t0
187  %t2 = add i32 %y, 2
188  %t3 = lshr i32 %t1, %t2
189  ret i32 %t3
190}
191; Can't fold, for second channel the shift would 32
192define <2 x i32> @t13_vec(<2 x i32> %x, <2 x i32> %y) {
193; CHECK-LABEL: @t13_vec(
194; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 32, i32 30>, [[Y:%.*]]
195; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]]
196; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[Y]], <i32 -2, i32 2>
197; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[T1]], [[T2]]
198; CHECK-NEXT:    ret <2 x i32> [[T3]]
199;
200  %t0 = sub <2 x i32> <i32 32, i32 30>, %y
201  %t1 = lshr <2 x i32> %x, %t0
202  %t2 = add <2 x i32> %y, <i32 -2, i32 2>
203  %t3 = lshr <2 x i32> %t1, %t2
204  ret <2 x i32> %t3
205}
206
207; If we have different right-shifts, in general, we can't do anything with it.
208define i32 @n13(i32 %x, i32 %y) {
209; CHECK-LABEL: @n13(
210; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
211; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
212; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -2
213; CHECK-NEXT:    [[T3:%.*]] = ashr i32 [[T1]], [[T2]]
214; CHECK-NEXT:    ret i32 [[T3]]
215;
216  %t0 = sub i32 32, %y
217  %t1 = lshr i32 %x, %t0
218  %t2 = add i32 %y, -2
219  %t3 = ashr i32 %t1, %t2
220  ret i32 %t3
221}
222define i32 @n14(i32 %x, i32 %y) {
223; CHECK-LABEL: @n14(
224; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
225; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
226; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
227; CHECK-NEXT:    [[T3:%.*]] = ashr i32 [[T1]], [[T2]]
228; CHECK-NEXT:    ret i32 [[T3]]
229;
230  %t0 = sub i32 32, %y
231  %t1 = lshr i32 %x, %t0
232  %t2 = add i32 %y, -1
233  %t3 = ashr i32 %t1, %t2
234  ret i32 %t3
235}
236define i32 @n15(i32 %x, i32 %y) {
237; CHECK-LABEL: @n15(
238; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
239; CHECK-NEXT:    [[T1:%.*]] = ashr i32 [[X:%.*]], [[T0]]
240; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -2
241; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[T1]], [[T2]]
242; CHECK-NEXT:    ret i32 [[T3]]
243;
244  %t0 = sub i32 32, %y
245  %t1 = ashr i32 %x, %t0
246  %t2 = add i32 %y, -2
247  %t3 = lshr i32 %t1, %t2
248  ret i32 %t3
249}
250define i32 @n16(i32 %x, i32 %y) {
251; CHECK-LABEL: @n16(
252; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
253; CHECK-NEXT:    [[T1:%.*]] = ashr i32 [[X:%.*]], [[T0]]
254; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
255; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[T1]], [[T2]]
256; CHECK-NEXT:    ret i32 [[T3]]
257;
258  %t0 = sub i32 32, %y
259  %t1 = ashr i32 %x, %t0
260  %t2 = add i32 %y, -1
261  %t3 = lshr i32 %t1, %t2
262  ret i32 %t3
263}
264
265; If the shift direction is different, then this should be handled elsewhere.
266define i32 @n17(i32 %x, i32 %y) {
267; CHECK-LABEL: @n17(
268; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
269; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
270; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
271; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[T1]], [[T2]]
272; CHECK-NEXT:    ret i32 [[T3]]
273;
274  %t0 = sub i32 32, %y
275  %t1 = shl i32 %x, %t0
276  %t2 = add i32 %y, -1
277  %t3 = lshr i32 %t1, %t2
278  ret i32 %t3
279}
280define i32 @n18(i32 %x, i32 %y) {
281; CHECK-LABEL: @n18(
282; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
283; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]]
284; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
285; CHECK-NEXT:    [[T3:%.*]] = ashr i32 [[T1]], [[T2]]
286; CHECK-NEXT:    ret i32 [[T3]]
287;
288  %t0 = sub i32 32, %y
289  %t1 = shl i32 %x, %t0
290  %t2 = add i32 %y, -1
291  %t3 = ashr i32 %t1, %t2
292  ret i32 %t3
293}
294define i32 @n19(i32 %x, i32 %y) {
295; CHECK-LABEL: @n19(
296; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
297; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
298; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
299; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[T1]], [[T2]]
300; CHECK-NEXT:    ret i32 [[T3]]
301;
302  %t0 = sub i32 32, %y
303  %t1 = lshr i32 %x, %t0
304  %t2 = add i32 %y, -1
305  %t3 = shl i32 %t1, %t2
306  ret i32 %t3
307}
308define i32 @n20(i32 %x, i32 %y) {
309; CHECK-LABEL: @n20(
310; CHECK-NEXT:    [[T0:%.*]] = sub i32 32, [[Y:%.*]]
311; CHECK-NEXT:    [[T1:%.*]] = ashr i32 [[X:%.*]], [[T0]]
312; CHECK-NEXT:    [[T2:%.*]] = add i32 [[Y]], -1
313; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[T1]], [[T2]]
314; CHECK-NEXT:    ret i32 [[T3]]
315;
316  %t0 = sub i32 32, %y
317  %t1 = ashr i32 %x, %t0
318  %t2 = add i32 %y, -1
319  %t3 = shl i32 %t1, %t2
320  ret i32 %t3
321}
322
323; See https://bugs.llvm.org/show_bug.cgi?id=44802
324define i3 @pr44802(i3 %t0) {
325; CHECK-LABEL: @pr44802(
326; CHECK-NEXT:    [[T1:%.*]] = sub i3 0, [[T0:%.*]]
327; CHECK-NEXT:    [[T2:%.*]] = icmp ne i3 [[T0]], 0
328; CHECK-NEXT:    [[T3:%.*]] = zext i1 [[T2]] to i3
329; CHECK-NEXT:    [[T4:%.*]] = lshr i3 [[T1]], [[T3]]
330; CHECK-NEXT:    [[T5:%.*]] = lshr i3 [[T4]], [[T3]]
331; CHECK-NEXT:    ret i3 [[T5]]
332;
333  %t1 = sub i3 0, %t0
334  %t2 = icmp ne i3 %t0, 0
335  %t3 = zext i1 %t2 to i3
336  %t4 = lshr i3 %t1, %t3
337  %t5 = lshr i3 %t4, %t3
338  ret i3 %t5
339}
340