1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; If we have some pattern that leaves only some low bits set, and then performs
5; left-shift of those bits, we can combine those two shifts into a shift+mask.
6
7; There are many variants to this pattern:
8;   d)  (trunc ((x & ((-1 << maskNbits) >> maskNbits)))) << shiftNbits
9; simplify to:
10;   ((trunc(x)) << shiftNbits) & (-1 >> ((-(maskNbits+shiftNbits))+32))
11
12; Simple tests.
13
14declare void @use32(i32)
15declare void @use64(i64)
16
17define i32 @t0_basic(i64 %x, i32 %nbits) {
18; CHECK-LABEL: @t0_basic(
19; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
20; CHECK-NEXT:    [[T1:%.*]] = shl i64 -1, [[T0]]
21; CHECK-NEXT:    [[T2:%.*]] = lshr i64 [[T1]], [[T0]]
22; CHECK-NEXT:    [[T3:%.*]] = add i32 [[NBITS]], -33
23; CHECK-NEXT:    call void @use64(i64 [[T0]])
24; CHECK-NEXT:    call void @use64(i64 [[T1]])
25; CHECK-NEXT:    call void @use64(i64 [[T2]])
26; CHECK-NEXT:    call void @use32(i32 [[T3]])
27; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
28; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], [[T3]]
29; CHECK-NEXT:    [[T6:%.*]] = and i32 [[TMP2]], 2147483647
30; CHECK-NEXT:    ret i32 [[T6]]
31;
32  %t0 = zext i32 %nbits to i64
33  %t1 = shl i64 -1, %t0
34  %t2 = lshr i64 %t1, %t0
35  %t3 = add i32 %nbits, -33
36
37  call void @use64(i64 %t0)
38  call void @use64(i64 %t1)
39  call void @use64(i64 %t2)
40  call void @use32(i32 %t3)
41
42  %t4 = and i64 %t2, %x
43  %t5 = trunc i64 %t4 to i32
44  %t6 = shl i32 %t5, %t3 ; shift is smaller than mask
45  ret i32 %t6
46}
47
48; Vectors
49
50declare void @use8xi32(<8 x i32>)
51declare void @use8xi64(<8 x i64>)
52
53define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
54; CHECK-LABEL: @t1_vec_splat(
55; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
56; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
57; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
58; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
59; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
60; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
61; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
62; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
63; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
64; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
65; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
66; CHECK-NEXT:    ret <8 x i32> [[T6]]
67;
68  %t0 = zext <8 x i32> %nbits to <8 x i64>
69  %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %t0
70  %t2 = lshr <8 x i64> %t1, %t0
71  %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
72
73  call void @use8xi64(<8 x i64> %t0)
74  call void @use8xi64(<8 x i64> %t1)
75  call void @use8xi64(<8 x i64> %t2)
76  call void @use8xi32(<8 x i32> %t3)
77
78  %t4 = and <8 x i64> %t2, %x
79  %t5 = trunc <8 x i64> %t4 to <8 x i32>
80  %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask
81  ret <8 x i32> %t6
82}
83
84define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
85; CHECK-LABEL: @t2_vec_splat_undef(
86; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
87; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]]
88; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
89; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33>
90; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
91; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
92; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
93; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
94; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
95; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
96; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
97; CHECK-NEXT:    ret <8 x i32> [[T6]]
98;
99  %t0 = zext <8 x i32> %nbits to <8 x i64>
100  %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0
101  %t2 = lshr <8 x i64> %t1, %t0
102  %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33>
103
104  call void @use8xi64(<8 x i64> %t0)
105  call void @use8xi64(<8 x i64> %t1)
106  call void @use8xi64(<8 x i64> %t2)
107  call void @use8xi32(<8 x i32> %t3)
108
109  %t4 = and <8 x i64> %t2, %x
110  %t5 = trunc <8 x i64> %t4 to <8 x i32>
111  %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask
112  ret <8 x i32> %t6
113}
114
115define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
116; CHECK-LABEL: @t3_vec_nonsplat(
117; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
118; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]]
119; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
120; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65>
121; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
122; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
123; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
124; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
125; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
126; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
127; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison>
128; CHECK-NEXT:    ret <8 x i32> [[T6]]
129;
130  %t0 = zext <8 x i32> %nbits to <8 x i64>
131  %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0
132  %t2 = lshr <8 x i64> %t1, %t0
133  %t3 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65>
134
135  call void @use8xi64(<8 x i64> %t0)
136  call void @use8xi64(<8 x i64> %t1)
137  call void @use8xi64(<8 x i64> %t2)
138  call void @use8xi32(<8 x i32> %t3)
139
140  %t4 = and <8 x i64> %t2, %x
141  %t5 = trunc <8 x i64> %t4 to <8 x i32>
142  %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask
143  ret <8 x i32> %t6
144}
145
146; Extra uses.
147
148define i32 @n4_extrause0(i64 %x, i32 %nbits) {
149; CHECK-LABEL: @n4_extrause0(
150; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
151; CHECK-NEXT:    [[T1:%.*]] = shl i64 -1, [[T0]]
152; CHECK-NEXT:    [[T2:%.*]] = lshr i64 [[T1]], [[T0]]
153; CHECK-NEXT:    [[T3:%.*]] = add i32 [[NBITS]], -33
154; CHECK-NEXT:    call void @use64(i64 [[T0]])
155; CHECK-NEXT:    call void @use64(i64 [[T1]])
156; CHECK-NEXT:    call void @use64(i64 [[T2]])
157; CHECK-NEXT:    call void @use32(i32 [[T3]])
158; CHECK-NEXT:    [[T4:%.*]] = and i64 [[T2]], [[X:%.*]]
159; CHECK-NEXT:    call void @use64(i64 [[T4]])
160; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T4]] to i32
161; CHECK-NEXT:    [[T6:%.*]] = shl i32 [[T5]], [[T3]]
162; CHECK-NEXT:    ret i32 [[T6]]
163;
164  %t0 = zext i32 %nbits to i64
165  %t1 = shl i64 -1, %t0
166  %t2 = lshr i64 %t1, %t0
167  %t3 = add i32 %nbits, -33
168
169  call void @use64(i64 %t0)
170  call void @use64(i64 %t1)
171  call void @use64(i64 %t2)
172  call void @use32(i32 %t3)
173
174  %t4 = and i64 %t2, %x
175  call void @use64(i64 %t4)
176  %t5 = trunc i64 %t4 to i32
177  %t6 = shl i32 %t5, %t3 ; shift is smaller than mask
178  ret i32 %t6
179}
180
181define i32 @n5_extrause1(i64 %x, i32 %nbits) {
182; CHECK-LABEL: @n5_extrause1(
183; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
184; CHECK-NEXT:    [[T1:%.*]] = shl i64 -1, [[T0]]
185; CHECK-NEXT:    [[T2:%.*]] = lshr i64 [[T1]], [[T0]]
186; CHECK-NEXT:    [[T3:%.*]] = add i32 [[NBITS]], -33
187; CHECK-NEXT:    call void @use64(i64 [[T0]])
188; CHECK-NEXT:    call void @use64(i64 [[T1]])
189; CHECK-NEXT:    call void @use64(i64 [[T2]])
190; CHECK-NEXT:    call void @use32(i32 [[T3]])
191; CHECK-NEXT:    [[T4:%.*]] = and i64 [[T2]], [[X:%.*]]
192; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T4]] to i32
193; CHECK-NEXT:    call void @use32(i32 [[T5]])
194; CHECK-NEXT:    [[T6:%.*]] = shl i32 [[T5]], [[T3]]
195; CHECK-NEXT:    ret i32 [[T6]]
196;
197  %t0 = zext i32 %nbits to i64
198  %t1 = shl i64 -1, %t0
199  %t2 = lshr i64 %t1, %t0
200  %t3 = add i32 %nbits, -33
201
202  call void @use64(i64 %t0)
203  call void @use64(i64 %t1)
204  call void @use64(i64 %t2)
205  call void @use32(i32 %t3)
206
207  %t4 = and i64 %t2, %x
208  %t5 = trunc i64 %t4 to i32
209  call void @use32(i32 %t5)
210  %t6 = shl i32 %t5, %t3 ; shift is smaller than mask
211  ret i32 %t6
212}
213
214define i32 @n6_extrause2(i64 %x, i32 %nbits) {
215; CHECK-LABEL: @n6_extrause2(
216; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
217; CHECK-NEXT:    [[T1:%.*]] = shl i64 -1, [[T0]]
218; CHECK-NEXT:    [[T2:%.*]] = lshr i64 [[T1]], [[T0]]
219; CHECK-NEXT:    [[T3:%.*]] = add i32 [[NBITS]], -33
220; CHECK-NEXT:    call void @use64(i64 [[T0]])
221; CHECK-NEXT:    call void @use64(i64 [[T1]])
222; CHECK-NEXT:    call void @use64(i64 [[T2]])
223; CHECK-NEXT:    call void @use32(i32 [[T3]])
224; CHECK-NEXT:    [[T4:%.*]] = and i64 [[T2]], [[X:%.*]]
225; CHECK-NEXT:    call void @use64(i64 [[T4]])
226; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T4]] to i32
227; CHECK-NEXT:    call void @use32(i32 [[T5]])
228; CHECK-NEXT:    [[T6:%.*]] = shl i32 [[T5]], [[T3]]
229; CHECK-NEXT:    ret i32 [[T6]]
230;
231  %t0 = zext i32 %nbits to i64
232  %t1 = shl i64 -1, %t0
233  %t2 = lshr i64 %t1, %t0
234  %t3 = add i32 %nbits, -33
235
236  call void @use64(i64 %t0)
237  call void @use64(i64 %t1)
238  call void @use64(i64 %t2)
239  call void @use32(i32 %t3)
240
241  %t4 = and i64 %t2, %x
242  call void @use64(i64 %t4)
243  %t5 = trunc i64 %t4 to i32
244  call void @use32(i32 %t5)
245  %t6 = shl i32 %t5, %t3 ; shift is smaller than mask
246  ret i32 %t6
247}
248