1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -expand-reductions -S | FileCheck %s
3; Tests without a target which should expand all reductions
4declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
5declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
6declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
7declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
8declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
9
10declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
11declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
12
13declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
14declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
15declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
16declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
17
18declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
19declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
20
21declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)
22
23define i64 @add_i64(<2 x i64> %vec) {
24; CHECK-LABEL: @add_i64(
25; CHECK-NEXT:  entry:
26; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
27; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
28; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
29; CHECK-NEXT:    ret i64 [[TMP0]]
30;
31entry:
32  %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec)
33  ret i64 %r
34}
35
36define i64 @mul_i64(<2 x i64> %vec) {
37; CHECK-LABEL: @mul_i64(
38; CHECK-NEXT:  entry:
39; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
40; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
41; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
42; CHECK-NEXT:    ret i64 [[TMP0]]
43;
44entry:
45  %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec)
46  ret i64 %r
47}
48
49define i64 @and_i64(<2 x i64> %vec) {
50; CHECK-LABEL: @and_i64(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
53; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
54; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
55; CHECK-NEXT:    ret i64 [[TMP0]]
56;
57entry:
58  %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec)
59  ret i64 %r
60}
61
62define i64 @or_i64(<2 x i64> %vec) {
63; CHECK-LABEL: @or_i64(
64; CHECK-NEXT:  entry:
65; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
66; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
67; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
68; CHECK-NEXT:    ret i64 [[TMP0]]
69;
70entry:
71  %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec)
72  ret i64 %r
73}
74
75define i64 @xor_i64(<2 x i64> %vec) {
76; CHECK-LABEL: @xor_i64(
77; CHECK-NEXT:  entry:
78; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
79; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
80; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
81; CHECK-NEXT:    ret i64 [[TMP0]]
82;
83entry:
84  %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec)
85  ret i64 %r
86}
87
88define float @fadd_f32(<4 x float> %vec) {
89; CHECK-LABEL: @fadd_f32(
90; CHECK-NEXT:  entry:
91; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
92; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
93; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
94; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
95; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
96; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
97; CHECK-NEXT:    ret float [[BIN_RDX3]]
98;
99entry:
100  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
101  ret float %r
102}
103
104define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
105; CHECK-LABEL: @fadd_f32_accum(
106; CHECK-NEXT:  entry:
107; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
108; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
109; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
110; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
111; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
112; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]]
113; CHECK-NEXT:    ret float [[BIN_RDX3]]
114;
115entry:
116  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
117  ret float %r
118}
119
120define float @fadd_f32_strict(<4 x float> %vec) {
121; CHECK-LABEL: @fadd_f32_strict(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
124; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
125; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
126; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
127; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
128; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
129; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
130; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
131; CHECK-NEXT:    ret float [[BIN_RDX3]]
132;
133entry:
134  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
135  ret float %r
136}
137
138define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
139; CHECK-LABEL: @fadd_f32_strict_accum(
140; CHECK-NEXT:  entry:
141; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
142; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
143; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
144; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
145; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
146; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
147; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
148; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
149; CHECK-NEXT:    ret float [[BIN_RDX3]]
150;
151entry:
152  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
153  ret float %r
154}
155
156define float @fmul_f32(<4 x float> %vec) {
157; CHECK-LABEL: @fmul_f32(
158; CHECK-NEXT:  entry:
159; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
160; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
161; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
162; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
163; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
164; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
165; CHECK-NEXT:    ret float [[BIN_RDX3]]
166;
167entry:
168  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
169  ret float %r
170}
171
172define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
173; CHECK-LABEL: @fmul_f32_accum(
174; CHECK-NEXT:  entry:
175; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
176; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
177; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
178; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
179; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
180; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]]
181; CHECK-NEXT:    ret float [[BIN_RDX3]]
182;
183entry:
184  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
185  ret float %r
186}
187
188define float @fmul_f32_strict(<4 x float> %vec) {
189; CHECK-LABEL: @fmul_f32_strict(
190; CHECK-NEXT:  entry:
191; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
192; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
193; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
194; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
195; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
196; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
197; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
198; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
199; CHECK-NEXT:    ret float [[BIN_RDX3]]
200;
201entry:
202  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
203  ret float %r
204}
205
206define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
207; CHECK-LABEL: @fmul_f32_strict_accum(
208; CHECK-NEXT:  entry:
209; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
210; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
211; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
212; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
213; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
214; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
215; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
216; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
217; CHECK-NEXT:    ret float [[BIN_RDX3]]
218;
219entry:
220  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
221  ret float %r
222}
223
224define i64 @smax_i64(<2 x i64> %vec) {
225; CHECK-LABEL: @smax_i64(
226; CHECK-NEXT:  entry:
227; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
228; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
229; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
230; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
231; CHECK-NEXT:    ret i64 [[TMP0]]
232;
233entry:
234  %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
235  ret i64 %r
236}
237
238define i64 @smin_i64(<2 x i64> %vec) {
239; CHECK-LABEL: @smin_i64(
240; CHECK-NEXT:  entry:
241; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
242; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
243; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
244; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
245; CHECK-NEXT:    ret i64 [[TMP0]]
246;
247entry:
248  %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
249  ret i64 %r
250}
251
252define i64 @umax_i64(<2 x i64> %vec) {
253; CHECK-LABEL: @umax_i64(
254; CHECK-NEXT:  entry:
255; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
256; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
257; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
258; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
259; CHECK-NEXT:    ret i64 [[TMP0]]
260;
261entry:
262  %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
263  ret i64 %r
264}
265
266define i64 @umin_i64(<2 x i64> %vec) {
267; CHECK-LABEL: @umin_i64(
268; CHECK-NEXT:  entry:
269; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
270; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
271; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
272; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
273; CHECK-NEXT:    ret i64 [[TMP0]]
274;
275entry:
276  %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
277  ret i64 %r
278}
279
280; FIXME: Expand using maxnum intrinsic?
281
282define double @fmax_f64(<2 x double> %vec) {
283; CHECK-LABEL: @fmax_f64(
284; CHECK-NEXT:  entry:
285; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
286; CHECK-NEXT:    ret double [[R]]
287;
288entry:
289  %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec)
290  ret double %r
291}
292
293; FIXME: Expand using minnum intrinsic?
294
295define double @fmin_f64(<2 x double> %vec) {
296; CHECK-LABEL: @fmin_f64(
297; CHECK-NEXT:  entry:
298; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
299; CHECK-NEXT:    ret double [[R]]
300;
301entry:
302  %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec)
303  ret double %r
304}
305
306; FIXME: Why is this not expanded?
307
308; Test when the vector size is not power of two.
309define i8 @test_v3i8(<3 x i8> %a) nounwind {
310; CHECK-LABEL: @test_v3i8(
311; CHECK-NEXT:  entry:
312; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
313; CHECK-NEXT:    ret i8 [[B]]
314;
315entry:
316  %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a)
317  ret i8 %b
318}
319