1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4declare i32 @llvm.fshl.i32(i32, i32, i32)
5declare i33 @llvm.fshr.i33(i33, i33, i33)
6declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
7declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
8
9; If the shift mask doesn't include any demanded bits, the funnel shift can be eliminated.
10
11define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) {
12; CHECK-LABEL: @fshl_mask_simplify1(
13; CHECK-NEXT:    ret i32 [[X:%.*]]
14;
15  %maskedsh = and i32 %sh, 32
16  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
17  ret i32 %r
18}
19
20define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
21; CHECK-LABEL: @fshr_mask_simplify2(
22; CHECK-NEXT:    ret <2 x i32> [[Y:%.*]]
23;
24  %maskedsh = and <2 x i32> %sh, <i32 64, i32 64>
25  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh)
26  ret <2 x i32> %r
27}
28
29; Negative test.
30
31define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) {
32; CHECK-LABEL: @fshl_mask_simplify3(
33; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 16
34; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
35; CHECK-NEXT:    ret i32 [[R]]
36;
37  %maskedsh = and i32 %sh, 16
38  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
39  ret i32 %r
40}
41
42; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
43
44define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) {
45; CHECK-LABEL: @fshr_mask_simplify1(
46; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 64
47; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
48; CHECK-NEXT:    ret i33 [[R]]
49;
50  %maskedsh = and i33 %sh, 64
51  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
52  ret i33 %r
53}
54
55; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
56
57define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
58; CHECK-LABEL: @fshl_mask_simplify2(
59; CHECK-NEXT:    [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], <i31 32, i31 32>
60; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]])
61; CHECK-NEXT:    ret <2 x i31> [[R]]
62;
63  %maskedsh = and <2 x i31> %sh, <i31 32, i31 32>
64  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %maskedsh)
65  ret <2 x i31> %r
66}
67
68; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
69
70define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) {
71; CHECK-LABEL: @fshr_mask_simplify3(
72; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 32
73; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
74; CHECK-NEXT:    ret i33 [[R]]
75;
76  %maskedsh = and i33 %sh, 32
77  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
78  ret i33 %r
79}
80
81; This mask op is unnecessary.
82
83define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) {
84; CHECK-LABEL: @fshl_mask_not_required(
85; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]])
86; CHECK-NEXT:    ret i32 [[R]]
87;
88  %maskedsh = and i32 %sh, 31
89  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
90  ret i32 %r
91}
92
93; This mask op can be reduced.
94
95define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) {
96; CHECK-LABEL: @fshl_mask_reduce_constant(
97; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1
98; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
99; CHECK-NEXT:    ret i32 [[R]]
100;
101  %maskedsh = and i32 %sh, 33
102  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
103  ret i32 %r
104}
105
106; But this mask op is required.
107
108define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) {
109; CHECK-LABEL: @fshl_mask_negative(
110; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 15
111; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
112; CHECK-NEXT:    ret i32 [[R]]
113;
114  %maskedsh = and i32 %sh, 15
115  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
116  ret i32 %r
117}
118
119; The transform is not limited to mask ops.
120
121define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
122; CHECK-LABEL: @fshr_set_but_not_demanded_vec(
123; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]])
124; CHECK-NEXT:    ret <2 x i32> [[R]]
125;
126  %bogusbits = or <2 x i32> %sh, <i32 32, i32 32>
127  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %bogusbits)
128  ret <2 x i32> %r
129}
130
131; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
132
133define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
134; CHECK-LABEL: @fshl_set_but_not_demanded_vec(
135; CHECK-NEXT:    [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], <i31 32, i31 32>
136; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]])
137; CHECK-NEXT:    ret <2 x i31> [[R]]
138;
139  %bogusbits = or <2 x i31> %sh, <i31 32, i31 32>
140  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %bogusbits)
141  ret <2 x i31> %r
142}
143
144; Simplify one undef or zero operand and constant shift amount.
145
146define i32 @fshl_op0_undef(i32 %x) {
147; CHECK-LABEL: @fshl_op0_undef(
148; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
149; CHECK-NEXT:    ret i32 [[R]]
150;
151  %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7)
152  ret i32 %r
153}
154
155define i32 @fshl_op0_zero(i32 %x) {
156; CHECK-LABEL: @fshl_op0_zero(
157; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
158; CHECK-NEXT:    ret i32 [[R]]
159;
160  %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7)
161  ret i32 %r
162}
163
164define i33 @fshr_op0_undef(i33 %x) {
165; CHECK-LABEL: @fshr_op0_undef(
166; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
167; CHECK-NEXT:    ret i33 [[R]]
168;
169  %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7)
170  ret i33 %r
171}
172
173define i33 @fshr_op0_zero(i33 %x) {
174; CHECK-LABEL: @fshr_op0_zero(
175; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
176; CHECK-NEXT:    ret i33 [[R]]
177;
178  %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7)
179  ret i33 %r
180}
181
182define i32 @fshl_op1_undef(i32 %x) {
183; CHECK-LABEL: @fshl_op1_undef(
184; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
185; CHECK-NEXT:    ret i32 [[R]]
186;
187  %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7)
188  ret i32 %r
189}
190
191define i32 @fshl_op1_zero(i32 %x) {
192; CHECK-LABEL: @fshl_op1_zero(
193; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
194; CHECK-NEXT:    ret i32 [[R]]
195;
196  %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7)
197  ret i32 %r
198}
199
200define i33 @fshr_op1_undef(i33 %x) {
201; CHECK-LABEL: @fshr_op1_undef(
202; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
203; CHECK-NEXT:    ret i33 [[R]]
204;
205  %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7)
206  ret i33 %r
207}
208
209define i33 @fshr_op1_zero(i33 %x) {
210; CHECK-LABEL: @fshr_op1_zero(
211; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
212; CHECK-NEXT:    ret i33 [[R]]
213;
214  %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7)
215  ret i33 %r
216}
217
218define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) {
219; CHECK-LABEL: @fshl_op0_zero_splat_vec(
220; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 24, i31 24>
221; CHECK-NEXT:    ret <2 x i31> [[R]]
222;
223  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 7, i31 7>)
224  ret <2 x i31> %r
225}
226
227define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) {
228; CHECK-LABEL: @fshl_op1_undef_splat_vec(
229; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 7, i31 7>
230; CHECK-NEXT:    ret <2 x i31> [[R]]
231;
232  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 7, i31 7>)
233  ret <2 x i31> %r
234}
235
236define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) {
237; CHECK-LABEL: @fshr_op0_undef_splat_vec(
238; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 7, i32 7>
239; CHECK-NEXT:    ret <2 x i32> [[R]]
240;
241  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>)
242  ret <2 x i32> %r
243}
244
245define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) {
246; CHECK-LABEL: @fshr_op1_zero_splat_vec(
247; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 25, i32 25>
248; CHECK-NEXT:    ret <2 x i32> [[R]]
249;
250  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>)
251  ret <2 x i32> %r
252}
253
254define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) {
255; CHECK-LABEL: @fshl_op0_zero_vec(
256; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 30, i31 29>
257; CHECK-NEXT:    ret <2 x i31> [[R]]
258;
259  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 -1, i31 33>)
260  ret <2 x i31> %r
261}
262
263define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) {
264; CHECK-LABEL: @fshl_op1_undef_vec(
265; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 1, i31 2>
266; CHECK-NEXT:    ret <2 x i31> [[R]]
267;
268  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 -1, i31 33>)
269  ret <2 x i31> %r
270}
271
272define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) {
273; CHECK-LABEL: @fshr_op0_undef_vec(
274; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 1>
275; CHECK-NEXT:    ret <2 x i32> [[R]]
276;
277  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 -1, i32 33>)
278  ret <2 x i32> %r
279}
280
281define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
282; CHECK-LABEL: @fshr_op1_zero_vec(
283; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 31>
284; CHECK-NEXT:    ret <2 x i32> [[R]]
285;
286  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 33>)
287  ret <2 x i32> %r
288}
289
290; Only demand bits from one of the operands.
291
292define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
293; CHECK-LABEL: @fshl_only_op0_demanded(
294; CHECK-NEXT:    [[Z:%.*]] = shl i32 [[X:%.*]], 7
295; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 128
296; CHECK-NEXT:    ret i32 [[R]]
297;
298  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
299  %r = and i32 %z, 128
300  ret i32 %r
301}
302
303define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) {
304; CHECK-LABEL: @fshl_only_op1_demanded(
305; CHECK-NEXT:    [[Z:%.*]] = lshr i32 [[Y:%.*]], 25
306; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 63
307; CHECK-NEXT:    ret i32 [[R]]
308;
309  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
310  %r = and i32 %z, 63
311  ret i32 %r
312}
313
314define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
315; CHECK-LABEL: @fshr_only_op1_demanded(
316; CHECK-NEXT:    [[Z:%.*]] = lshr i33 [[Y:%.*]], 7
317; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 12392
318; CHECK-NEXT:    ret i33 [[R]]
319;
320  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
321  %r = and i33 %z, 12392
322  ret i33 %r
323}
324
325define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
326; CHECK-LABEL: @fshr_only_op0_demanded(
327; CHECK-NEXT:    [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4
328; CHECK-NEXT:    [[R:%.*]] = and i33 [[TMP1]], 7
329; CHECK-NEXT:    ret i33 [[R]]
330;
331  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
332  %r = lshr i33 %z, 30
333  ret i33 %r
334}
335
336define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) {
337; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat(
338; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
339; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
340; CHECK-NEXT:    ret <2 x i31> [[R]]
341;
342  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>)
343  %r = and <2 x i31> %z, <i31 63, i31 31>
344  ret <2 x i31> %r
345}
346
347define i32 @fshl_constant_shift_amount_modulo_bitwidth(i32 %x, i32 %y) {
348; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth(
349; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 1)
350; CHECK-NEXT:    ret i32 [[R]]
351;
352  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 33)
353  ret i32 %r
354}
355
356define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) {
357; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth(
358; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 32)
359; CHECK-NEXT:    ret i33 [[R]]
360;
361  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34)
362  ret i33 %r
363}
364
365define i32 @fshl_undef_shift_amount(i32 %x, i32 %y) {
366; CHECK-LABEL: @fshl_undef_shift_amount(
367; CHECK-NEXT:    ret i32 [[X:%.*]]
368;
369  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 undef)
370  ret i32 %r
371}
372
373define i33 @fshr_undef_shift_amount(i33 %x, i33 %y) {
374; CHECK-LABEL: @fshr_undef_shift_amount(
375; CHECK-NEXT:    ret i33 [[Y:%.*]]
376;
377  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 undef)
378  ret i33 %r
379}
380
381@external_global = external global i8
382
383define i33 @fshr_constant_shift_amount_modulo_bitwidth_constexpr(i33 %x, i33 %y) {
384; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_constexpr(
385; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 ptrtoint (i8* @external_global to i33))
386; CHECK-NEXT:    ret i33 [[R]]
387;
388  %shamt = ptrtoint i8* @external_global to i33
389  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %shamt)
390  ret i33 %r
391}
392
393define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) {
394; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec(
395; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 30, i32 1>)
396; CHECK-NEXT:    ret <2 x i32> [[R]]
397;
398  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>)
399  ret <2 x i32> %r
400}
401
402define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec(<2 x i31> %x, <2 x i31> %y) {
403; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec(
404; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 3, i31 1>)
405; CHECK-NEXT:    ret <2 x i31> [[R]]
406;
407  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 -1>)
408  ret <2 x i31> %r
409}
410
411define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(<2 x i31> %x, <2 x i31> %y) {
412; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(
413; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
414; CHECK-NEXT:    ret <2 x i31> [[R]]
415;
416  %shamt = ptrtoint i8* @external_global to i31
417  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
418  ret <2 x i31> %r
419}
420
421define <2 x i31> @fshl_undef_shift_amount_vec(<2 x i31> %x, <2 x i31> %y) {
422; CHECK-LABEL: @fshl_undef_shift_amount_vec(
423; CHECK-NEXT:    ret <2 x i31> [[X:%.*]]
424;
425  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> undef)
426  ret <2 x i31> %r
427}
428
429define <2 x i32> @fshr_undef_shift_amount_vec(<2 x i32> %x, <2 x i32> %y) {
430; CHECK-LABEL: @fshr_undef_shift_amount_vec(
431; CHECK-NEXT:    ret <2 x i32> [[Y:%.*]]
432;
433  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> undef)
434  ret <2 x i32> %r
435}
436
437; TODO: Don't let SimplifyDemandedBits split up a rotate - keep the same operand.
438
439define i32 @rotl_common_demanded(i32 %a0) {
440; CHECK-LABEL: @rotl_common_demanded(
441; CHECK-NEXT:    [[X:%.*]] = xor i32 [[A0:%.*]], 2
442; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[A0]], i32 8)
443; CHECK-NEXT:    ret i32 [[R]]
444;
445  %x = xor i32 %a0, 2
446  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8)
447  ret i32 %r
448}
449
450define i33 @rotr_common_demanded(i33 %a0) {
451; CHECK-LABEL: @rotr_common_demanded(
452; CHECK-NEXT:    [[X:%.*]] = xor i33 [[A0:%.*]], 2
453; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[A0]], i33 25)
454; CHECK-NEXT:    ret i33 [[R]]
455;
456  %x = xor i33 %a0, 2
457  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 8)
458  ret i33 %r
459}
460
461; The shift modulo bitwidth is the same for all vector elements.
462
463define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) {
464; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat(
465; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
466; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
467; CHECK-NEXT:    ret <2 x i31> [[R]]
468;
469  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>)
470  %r = and <2 x i31> %z, <i31 63, i31 31>
471  ret <2 x i31> %r
472}
473
474define i32 @rotl_constant_shift_amount(i32 %x) {
475; CHECK-LABEL: @rotl_constant_shift_amount(
476; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 1)
477; CHECK-NEXT:    ret i32 [[R]]
478;
479  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 33)
480  ret i32 %r
481}
482
483define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) {
484; CHECK-LABEL: @rotl_constant_shift_amount_vec(
485; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> <i31 1, i31 1>)
486; CHECK-NEXT:    ret <2 x i31> [[R]]
487;
488  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> <i31 32, i31 -1>)
489  ret <2 x i31> %r
490}
491
492define i33 @rotr_constant_shift_amount(i33 %x) {
493; CHECK-LABEL: @rotr_constant_shift_amount(
494; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[X]], i33 32)
495; CHECK-NEXT:    ret i33 [[R]]
496;
497  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 34)
498  ret i33 %r
499}
500
501define <2 x i32> @rotr_constant_shift_amount_vec(<2 x i32> %x) {
502; CHECK-LABEL: @rotr_constant_shift_amount_vec(
503; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 31, i32 1>)
504; CHECK-NEXT:    ret <2 x i32> [[R]]
505;
506  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 33, i32 -1>)
507  ret <2 x i32> %r
508}
509
510; Demand bits from both operands -- cannot simplify.
511
512define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) {
513; CHECK-LABEL: @fshl_both_ops_demanded(
514; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
515; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 192
516; CHECK-NEXT:    ret i32 [[R]]
517;
518  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
519  %r = and i32 %z, 192
520  ret i32 %r
521}
522
523define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) {
524; CHECK-LABEL: @fshr_both_ops_demanded(
525; CHECK-NEXT:    [[Z:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
526; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 192
527; CHECK-NEXT:    ret i33 [[R]]
528;
529  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 26)
530  %r = and i33 %z, 192
531  ret i33 %r
532}
533
534; Both operands are demanded, but there are known bits.
535
536define i32 @fshl_known_bits(i32 %x, i32 %y) {
537; CHECK-LABEL: @fshl_known_bits(
538; CHECK-NEXT:    ret i32 128
539;
540  %x2 = or i32 %x, 1   ; lo bit set
541  %y2 = lshr i32 %y, 1 ; hi bit clear
542  %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 7)
543  %r = and i32 %z, 192
544  ret i32 %r
545}
546
547define i33 @fshr_known_bits(i33 %x, i33 %y) {
548; CHECK-LABEL: @fshr_known_bits(
549; CHECK-NEXT:    ret i33 128
550;
551  %x2 = or i33 %x, 1 ; lo bit set
552  %y2 = lshr i33 %y, 1 ; hi bit set
553  %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 26)
554  %r = and i33 %z, 192
555  ret i33 %r
556}
557
558; This case fails to simplify due to multiple uses.
559
560define i33 @fshr_multi_use(i33 %a) {
561; CHECK-LABEL: @fshr_multi_use(
562; CHECK-NEXT:    [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
563; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[B]], 23
564; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[B]]
565; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
566; CHECK-NEXT:    ret i33 [[E]]
567;
568  %b = tail call i33 @llvm.fshr.i33(i33 %a, i33 %a, i33 1)
569  %c = lshr i33 %b, 23
570  %d = xor i33 %c, %b
571  %e = and i33 %d, 31
572  ret i33 %e
573}
574
575; This demonstrates the same simplification working if the fshr intrinsic
576; is expanded into shifts and or.
577
578define i33 @expanded_fshr_multi_use(i33 %a) {
579; CHECK-LABEL: @expanded_fshr_multi_use(
580; CHECK-NEXT:    [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
581; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[B]], 23
582; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[B]]
583; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
584; CHECK-NEXT:    ret i33 [[E]]
585;
586  %t = lshr i33 %a, 1
587  %t2 = shl i33 %a, 32
588  %b = or i33 %t, %t2
589  %c = lshr i33 %b, 23
590  %d = xor i33 %c, %b
591  %e = and i33 %d, 31
592  ret i33 %e
593}
594
595declare i16 @llvm.fshl.i16(i16, i16, i16)
596declare i16 @llvm.fshr.i16(i16, i16, i16)
597declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
598
599; Special-case: rotate a 16-bit value left/right by 8-bits is bswap.
600
601define i16 @fshl_bswap(i16 %x) {
602; CHECK-LABEL: @fshl_bswap(
603; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
604; CHECK-NEXT:    ret i16 [[R]]
605;
606  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8)
607  ret i16 %r
608}
609
610define i16 @fshr_bswap(i16 %x) {
611; CHECK-LABEL: @fshr_bswap(
612; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
613; CHECK-NEXT:    ret i16 [[R]]
614;
615  %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8)
616  ret i16 %r
617}
618
619define <3 x i16> @fshl_bswap_vector(<3 x i16> %x) {
620; CHECK-LABEL: @fshl_bswap_vector(
621; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> [[X:%.*]])
622; CHECK-NEXT:    ret <3 x i16> [[R]]
623;
624  %r = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %x, <3 x i16> %x, <3 x i16> <i16 8, i16 8, i16 8>)
625  ret <3 x i16> %r
626}
627
628; Negative test
629
630define i16 @fshl_bswap_wrong_op(i16 %x, i16 %y) {
631; CHECK-LABEL: @fshl_bswap_wrong_op(
632; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i16 8)
633; CHECK-NEXT:    ret i16 [[R]]
634;
635  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 8)
636  ret i16 %r
637}
638
639; Negative test
640
641define i16 @fshr_bswap_wrong_amount(i16 %x) {
642; CHECK-LABEL: @fshr_bswap_wrong_amount(
643; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 12)
644; CHECK-NEXT:    ret i16 [[R]]
645;
646  %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 4)
647  ret i16 %r
648}
649
650; Negative test
651
652define i32 @fshl_bswap_wrong_width(i32 %x) {
653; CHECK-LABEL: @fshl_bswap_wrong_width(
654; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8)
655; CHECK-NEXT:    ret i32 [[R]]
656;
657  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8)
658  ret i32 %r
659}
660
661define i32 @fshl_mask_args_same1(i32 %a) {
662; CHECK-LABEL: @fshl_mask_args_same1(
663; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[A:%.*]], 16
664; CHECK-NEXT:    ret i32 [[T2]]
665;
666  %t1 = and i32 %a, 4294901760 ; 0xffff0000
667  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 16)
668  ret i32 %t2
669}
670
671define i32 @fshl_mask_args_same2(i32 %a) {
672; CHECK-LABEL: @fshl_mask_args_same2(
673; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[A:%.*]], 8
674; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T1]], 65280
675; CHECK-NEXT:    ret i32 [[T2]]
676;
677  %t1 = and i32 %a, 255
678  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 8)
679  ret i32 %t2
680}
681
682define i32 @fshl_mask_args_same3(i32 %a) {
683; CHECK-LABEL: @fshl_mask_args_same3(
684; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[A:%.*]], 24
685; CHECK-NEXT:    ret i32 [[T2]]
686;
687  %t1 = and i32 %a, 255
688  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 24)
689  ret i32 %t2
690}
691
692define i32 @fshl_mask_args_different(i32 %a) {
693; CHECK-LABEL: @fshl_mask_args_different(
694; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[A:%.*]], 15
695; CHECK-NEXT:    [[T3:%.*]] = and i32 [[T1]], 130560
696; CHECK-NEXT:    ret i32 [[T3]]
697;
698  %t2 = and i32 %a, 4294901760 ; 0xfffff00f
699  %t1 = and i32 %a, 4278190080 ; 0xff00f00f
700  %t3 = call i32 @llvm.fshl.i32(i32 %t2, i32 %t1, i32 17)
701  ret i32 %t3
702}
703
704define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) {
705; CHECK-LABEL: @fshr_mask_args_same_vector(
706; CHECK-NEXT:    [[T3:%.*]] = shl <2 x i31> [[A:%.*]], <i31 10, i31 10>
707; CHECK-NEXT:    ret <2 x i31> [[T3]]
708;
709  %t1 = and <2 x i31> %a, <i31 1000, i31 1000>
710  %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
711  %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 10>)
712  ret <2 x i31> %t3
713}
714
715define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) {
716; CHECK-LABEL: @fshr_mask_args_same_vector2(
717; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000>
718; CHECK-NEXT:    [[T3:%.*]] = lshr exact <2 x i32> [[T1]], <i32 3, i32 3>
719; CHECK-NEXT:    ret <2 x i32> [[T3]]
720;
721  %t1 = and <2 x i32> %a, <i32 1000000, i32 100000>
722  %t2 = and <2 x i32> %a, <i32 6442450943, i32 6442450943>
723  %t3 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %t1, <2 x i32> %t1, <2 x i32> <i32 3, i32 3>)
724  ret <2 x i32> %t3
725}
726
727define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) {
728; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable(
729; CHECK-NEXT:    [[T1:%.*]] = and <2 x i31> [[A:%.*]], <i31 1000, i31 1000>
730; CHECK-NEXT:    [[T3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[T1]], <2 x i31> <i31 10, i31 3>)
731; CHECK-NEXT:    ret <2 x i31> [[T3]]
732;
733  %t1 = and <2 x i31> %a, <i31 1000, i31 1000>
734  %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
735  %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 3>)
736  ret <2 x i31> %t3
737}
738