1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -expand-reductions -S | FileCheck %s 3; Tests without a target which should expand all reductions 4declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 5declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) 6declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) 7declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) 8declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) 9 10declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) 11declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) 12 13declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 14declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 15declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 16declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 17 18declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 19declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 20 21declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>) 22 23define i64 @add_i64(<2 x i64> %vec) { 24; CHECK-LABEL: @add_i64( 25; CHECK-NEXT: entry: 26; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 27; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]] 28; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 29; CHECK-NEXT: ret i64 [[TMP0]] 30; 31entry: 32 %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec) 33 ret i64 %r 34} 35 36define i64 @mul_i64(<2 x i64> %vec) { 37; CHECK-LABEL: @mul_i64( 38; CHECK-NEXT: entry: 39; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 40; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]] 41; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 42; CHECK-NEXT: ret i64 [[TMP0]] 43; 44entry: 45 %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec) 46 ret i64 %r 47} 48 49define i64 @and_i64(<2 x i64> %vec) { 50; CHECK-LABEL: @and_i64( 51; CHECK-NEXT: entry: 52; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 53; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]] 54; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 55; CHECK-NEXT: ret i64 [[TMP0]] 56; 57entry: 58 %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec) 59 ret i64 %r 60} 61 62define i64 @or_i64(<2 x i64> %vec) { 63; CHECK-LABEL: @or_i64( 64; CHECK-NEXT: entry: 65; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 66; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]] 67; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 68; CHECK-NEXT: ret i64 [[TMP0]] 69; 70entry: 71 %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec) 72 ret i64 %r 73} 74 75define i64 @xor_i64(<2 x i64> %vec) { 76; CHECK-LABEL: @xor_i64( 77; CHECK-NEXT: entry: 78; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 79; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]] 80; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 81; CHECK-NEXT: ret i64 [[TMP0]] 82; 83entry: 84 %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec) 85 ret i64 %r 86} 87 88define float @fadd_f32(<4 x float> %vec) { 89; CHECK-LABEL: @fadd_f32( 90; CHECK-NEXT: entry: 91; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 92; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] 93; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 94; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 95; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 96; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]] 97; CHECK-NEXT: ret float [[BIN_RDX3]] 98; 99entry: 100 %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec) 101 ret float %r 102} 103 104define float @fadd_f32_accum(float %accum, <4 x float> %vec) { 105; CHECK-LABEL: @fadd_f32_accum( 106; CHECK-NEXT: entry: 107; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 108; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] 109; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 110; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 111; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 112; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]] 113; CHECK-NEXT: ret float [[BIN_RDX3]] 114; 115entry: 116 %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) 117 ret float %r 118} 119 120define float @fadd_f32_strict(<4 x float> %vec) { 121; CHECK-LABEL: @fadd_f32_strict( 122; CHECK-NEXT: entry: 123; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 124; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]] 125; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 126; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] 127; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 128; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] 129; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 130; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] 131; CHECK-NEXT: ret float [[BIN_RDX3]] 132; 133entry: 134 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) 135 ret float %r 136} 137 138define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) { 139; CHECK-LABEL: @fadd_f32_strict_accum( 140; CHECK-NEXT: entry: 141; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 142; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]] 143; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 144; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] 145; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 146; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] 147; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 148; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] 149; CHECK-NEXT: ret float [[BIN_RDX3]] 150; 151entry: 152 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) 153 ret float %r 154} 155 156define float @fmul_f32(<4 x float> %vec) { 157; CHECK-LABEL: @fmul_f32( 158; CHECK-NEXT: entry: 159; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 160; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] 161; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 162; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 163; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 164; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]] 165; CHECK-NEXT: ret float [[BIN_RDX3]] 166; 167entry: 168 %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec) 169 ret float %r 170} 171 172define float @fmul_f32_accum(float %accum, <4 x float> %vec) { 173; CHECK-LABEL: @fmul_f32_accum( 174; CHECK-NEXT: entry: 175; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 176; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] 177; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 178; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 179; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 180; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]] 181; CHECK-NEXT: ret float [[BIN_RDX3]] 182; 183entry: 184 %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) 185 ret float %r 186} 187 188define float @fmul_f32_strict(<4 x float> %vec) { 189; CHECK-LABEL: @fmul_f32_strict( 190; CHECK-NEXT: entry: 191; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 192; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]] 193; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 194; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] 195; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 196; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] 197; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 198; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] 199; CHECK-NEXT: ret float [[BIN_RDX3]] 200; 201entry: 202 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) 203 ret float %r 204} 205 206define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) { 207; CHECK-LABEL: @fmul_f32_strict_accum( 208; CHECK-NEXT: entry: 209; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 210; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]] 211; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 212; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] 213; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 214; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] 215; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 216; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] 217; CHECK-NEXT: ret float [[BIN_RDX3]] 218; 219entry: 220 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) 221 ret float %r 222} 223 224define i64 @smax_i64(<2 x i64> %vec) { 225; CHECK-LABEL: @smax_i64( 226; CHECK-NEXT: entry: 227; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 228; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]] 229; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 230; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 231; CHECK-NEXT: ret i64 [[TMP0]] 232; 233entry: 234 %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) 235 ret i64 %r 236} 237 238define i64 @smin_i64(<2 x i64> %vec) { 239; CHECK-LABEL: @smin_i64( 240; CHECK-NEXT: entry: 241; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 242; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]] 243; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 244; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 245; CHECK-NEXT: ret i64 [[TMP0]] 246; 247entry: 248 %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) 249 ret i64 %r 250} 251 252define i64 @umax_i64(<2 x i64> %vec) { 253; CHECK-LABEL: @umax_i64( 254; CHECK-NEXT: entry: 255; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 256; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]] 257; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 258; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 259; CHECK-NEXT: ret i64 [[TMP0]] 260; 261entry: 262 %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) 263 ret i64 %r 264} 265 266define i64 @umin_i64(<2 x i64> %vec) { 267; CHECK-LABEL: @umin_i64( 268; CHECK-NEXT: entry: 269; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 270; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]] 271; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 272; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 273; CHECK-NEXT: ret i64 [[TMP0]] 274; 275entry: 276 %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) 277 ret i64 %r 278} 279 280; FIXME: Expand using maxnum intrinsic? 281 282define double @fmax_f64(<2 x double> %vec) { 283; CHECK-LABEL: @fmax_f64( 284; CHECK-NEXT: entry: 285; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]]) 286; CHECK-NEXT: ret double [[R]] 287; 288entry: 289 %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec) 290 ret double %r 291} 292 293; FIXME: Expand using minnum intrinsic? 294 295define double @fmin_f64(<2 x double> %vec) { 296; CHECK-LABEL: @fmin_f64( 297; CHECK-NEXT: entry: 298; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]]) 299; CHECK-NEXT: ret double [[R]] 300; 301entry: 302 %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec) 303 ret double %r 304} 305 306; FIXME: Why is this not expanded? 307 308; Test when the vector size is not power of two. 309define i8 @test_v3i8(<3 x i8> %a) nounwind { 310; CHECK-LABEL: @test_v3i8( 311; CHECK-NEXT: entry: 312; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]]) 313; CHECK-NEXT: ret i8 [[B]] 314; 315entry: 316 %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a) 317 ret i8 %b 318} 319