1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; If we have some pattern that leaves only some low bits set, ashr then performs
5; left-shift of those bits, we can combine those two shifts into a shift+mask.
6
7; There are many variants to this pattern:
8;   e)  (trunc (((x << maskNbits) a>> maskNbits))) << shiftNbits
9; simplify to:
10;   (trunc(x)) << shiftNbits
11
12; Simple tests.
13
14declare void @use32(i32)
15declare void @use64(i64)
16
17define i32 @t0_basic(i64 %x, i32 %nbits) {
18; CHECK-LABEL: @t0_basic(
19; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
20; CHECK-NEXT:    [[T1:%.*]] = shl i64 [[X:%.*]], [[T0]]
21; CHECK-NEXT:    [[T2:%.*]] = add i32 [[NBITS]], -32
22; CHECK-NEXT:    [[T3:%.*]] = ashr i64 [[T1]], [[T0]]
23; CHECK-NEXT:    call void @use64(i64 [[T0]])
24; CHECK-NEXT:    call void @use64(i64 [[T1]])
25; CHECK-NEXT:    call void @use32(i32 [[T2]])
26; CHECK-NEXT:    call void @use64(i64 [[T3]])
27; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X]] to i32
28; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[TMP1]], [[T2]]
29; CHECK-NEXT:    ret i32 [[T5]]
30;
31  %t0 = zext i32 %nbits to i64
32  %t1 = shl i64 %x, %t0
33  %t2 = add i32 %nbits, -32
34  %t3 = ashr i64 %t1, %t0
35
36  call void @use64(i64 %t0)
37  call void @use64(i64 %t1)
38  call void @use32(i32 %t2)
39  call void @use64(i64 %t3)
40
41  %t4 = trunc i64 %t3 to i32
42  %t5 = shl i32 %t4, %t2
43  ret i32 %t5
44}
45
46; Vectors
47
48declare void @use8xi32(<8 x i32>)
49declare void @use8xi64(<8 x i64>)
50
51define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
52; CHECK-LABEL: @t1_vec_splat(
53; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
54; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
55; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
56; CHECK-NEXT:    [[T3:%.*]] = ashr <8 x i64> [[T1]], [[T0]]
57; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
58; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
59; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
60; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T3]])
61; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
62; CHECK-NEXT:    [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
63; CHECK-NEXT:    ret <8 x i32> [[T5]]
64;
65  %t0 = zext <8 x i32> %nbits to <8 x i64>
66  %t1 = shl <8 x i64> %x, %t0
67  %t2 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
68  %t3 = ashr <8 x i64> %t1, %t0
69
70  call void @use8xi64(<8 x i64> %t0)
71  call void @use8xi64(<8 x i64> %t1)
72  call void @use8xi32(<8 x i32> %t2)
73  call void @use8xi64(<8 x i64> %t3)
74
75  %t4 = trunc <8 x i64> %t3 to <8 x i32>
76  %t5 = shl <8 x i32> %t4, %t2
77  ret <8 x i32> %t5
78}
79
80define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
81; CHECK-LABEL: @t2_vec_splat_undef(
82; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
83; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
84; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32>
85; CHECK-NEXT:    [[T3:%.*]] = ashr <8 x i64> [[T1]], [[T0]]
86; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
87; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
88; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
89; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T3]])
90; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
91; CHECK-NEXT:    [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
92; CHECK-NEXT:    ret <8 x i32> [[T5]]
93;
94  %t0 = zext <8 x i32> %nbits to <8 x i64>
95  %t1 = shl <8 x i64> %x, %t0
96  %t2 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32>
97  %t3 = ashr <8 x i64> %t1, %t0
98
99  call void @use8xi64(<8 x i64> %t0)
100  call void @use8xi64(<8 x i64> %t1)
101  call void @use8xi32(<8 x i32> %t2)
102  call void @use8xi64(<8 x i64> %t3)
103
104  %t4 = trunc <8 x i64> %t3 to <8 x i32>
105  %t5 = shl <8 x i32> %t4, %t2
106  ret <8 x i32> %t5
107}
108
109define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
110; CHECK-LABEL: @t3_vec_nonsplat(
111; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
112; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
113; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64>
114; CHECK-NEXT:    [[T3:%.*]] = ashr <8 x i64> [[T1]], [[T0]]
115; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
116; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
117; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
118; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T3]])
119; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
120; CHECK-NEXT:    [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
121; CHECK-NEXT:    ret <8 x i32> [[T5]]
122;
123  %t0 = zext <8 x i32> %nbits to <8 x i64>
124  %t1 = shl <8 x i64> %x, %t0
125  %t2 = add <8 x i32> %nbits, <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64>
126  %t3 = ashr <8 x i64> %t1, %t0
127
128  call void @use8xi64(<8 x i64> %t0)
129  call void @use8xi64(<8 x i64> %t1)
130  call void @use8xi32(<8 x i32> %t2)
131  call void @use8xi64(<8 x i64> %t3)
132
133  %t4 = trunc <8 x i64> %t3 to <8 x i32>
134  %t5 = shl <8 x i32> %t4, %t2
135  ret <8 x i32> %t5
136}
137
138; Extra uses.
139
140define i32 @n4_extrause(i64 %x, i32 %nbits) {
141; CHECK-LABEL: @n4_extrause(
142; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
143; CHECK-NEXT:    [[T1:%.*]] = shl i64 [[X:%.*]], [[T0]]
144; CHECK-NEXT:    [[T2:%.*]] = add i32 [[NBITS]], -32
145; CHECK-NEXT:    [[T3:%.*]] = ashr i64 [[T1]], [[T0]]
146; CHECK-NEXT:    call void @use64(i64 [[T0]])
147; CHECK-NEXT:    call void @use64(i64 [[T1]])
148; CHECK-NEXT:    call void @use32(i32 [[T2]])
149; CHECK-NEXT:    call void @use64(i64 [[T3]])
150; CHECK-NEXT:    [[T4:%.*]] = trunc i64 [[T3]] to i32
151; CHECK-NEXT:    call void @use32(i32 [[T4]])
152; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[T4]], [[T2]]
153; CHECK-NEXT:    ret i32 [[T5]]
154;
155  %t0 = zext i32 %nbits to i64
156  %t1 = shl i64 %x, %t0
157  %t2 = add i32 %nbits, -32
158  %t3 = ashr i64 %t1, %t0
159
160  call void @use64(i64 %t0)
161  call void @use64(i64 %t1)
162  call void @use32(i32 %t2)
163  call void @use64(i64 %t3)
164
165  %t4 = trunc i64 %t3 to i32
166  call void @use32(i32 %t4)
167  %t5 = shl i32 %t4, %t2
168  ret i32 %t5
169}
170
171; If mask is needed - we can't fold.
172
173define i32 @n5_mask(i64 %x, i32 %nbits) {
174; CHECK-LABEL: @n5_mask(
175; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64
176; CHECK-NEXT:    [[T1:%.*]] = shl i64 [[X:%.*]], [[T0]]
177; CHECK-NEXT:    [[T2:%.*]] = add i32 [[NBITS]], -33
178; CHECK-NEXT:    call void @use64(i64 [[T0]])
179; CHECK-NEXT:    call void @use64(i64 [[T1]])
180; CHECK-NEXT:    call void @use32(i32 [[T2]])
181; CHECK-NEXT:    [[T3:%.*]] = ashr i64 [[T1]], [[T0]]
182; CHECK-NEXT:    [[T4:%.*]] = trunc i64 [[T3]] to i32
183; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[T4]], [[T2]]
184; CHECK-NEXT:    ret i32 [[T5]]
185;
186  %t0 = zext i32 %nbits to i64
187  %t1 = shl i64 %x, %t0
188  %t2 = add i32 %nbits, -33
189
190  call void @use64(i64 %t0)
191  call void @use64(i64 %t1)
192  call void @use32(i32 %t2)
193
194  %t3 = ashr i64 %t1, %t0
195  %t4 = trunc i64 %t3 to i32
196  %t5 = shl i32 %t4, %t2
197  ret i32 %t5
198}
199