1; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 5declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) 6declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) 7declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) 8declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) 9declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) 10declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) 11 12declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) 13declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) 14declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) 15declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) 16declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) 17declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) 18 19declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>) 20declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) 21 22; CHECK-LABEL: smax_B 23; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b 24define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { 25 %arr.load = load <16 x i8>, <16 x i8>* %arr 26 %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load) 27 ret i8 %r 28} 29 30; CHECK-LABEL: smax_H 31; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h 32define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { 33 %arr.load = load <8 x i16>, <8 x i16>* %arr 34 %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load) 35 ret i16 %r 36} 37 38; CHECK-LABEL: smax_S 39; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s 40define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { 41 %arr.load = load <4 x i32>, <4 x i32>* %arr 42 %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load) 43 ret i32 %r 44} 45 46; CHECK-LABEL: umax_B 47; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b 48define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { 49 %arr.load = load <16 x i8>, <16 x i8>* %arr 50 %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load) 51 ret i8 %r 52} 53 54; CHECK-LABEL: umax_H 55; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h 56define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { 57 %arr.load = load <8 x i16>, <8 x i16>* %arr 58 %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load) 59 ret i16 %r 60} 61 62; CHECK-LABEL: umax_S 63; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s 64define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { 65 %arr.load = load <4 x i32>, <4 x i32>* %arr 66 %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load) 67 ret i32 %r 68} 69 70; CHECK-LABEL: smin_B 71; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b 72define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { 73 %arr.load = load <16 x i8>, <16 x i8>* %arr 74 %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load) 75 ret i8 %r 76} 77 78; CHECK-LABEL: smin_H 79; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h 80define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { 81 %arr.load = load <8 x i16>, <8 x i16>* %arr 82 %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load) 83 ret i16 %r 84} 85 86; CHECK-LABEL: smin_S 87; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s 88define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { 89 %arr.load = load <4 x i32>, <4 x i32>* %arr 90 %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load) 91 ret i32 %r 92} 93 94; CHECK-LABEL: umin_B 95; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b 96define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { 97 %arr.load = load <16 x i8>, <16 x i8>* %arr 98 %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load) 99 ret i8 %r 100} 101 102; CHECK-LABEL: umin_H 103; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h 104define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { 105 %arr.load = load <8 x i16>, <8 x i16>* %arr 106 %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load) 107 ret i16 %r 108} 109 110; CHECK-LABEL: umin_S 111; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s 112define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { 113 %arr.load = load <4 x i32>, <4 x i32>* %arr 114 %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load) 115 ret i32 %r 116} 117 118; CHECK-LABEL: fmaxnm_S 119; CHECK: fmaxnmv 120define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { 121 %arr.load = load <4 x float>, <4 x float>* %arr 122 %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load) 123 ret float %r 124} 125 126; CHECK-LABEL: fminnm_S 127; CHECK: fminnmv 128define float @fminnm_S(<4 x float>* nocapture readonly %arr) { 129 %arr.load = load <4 x float>, <4 x float>* %arr 130 %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load) 131 ret float %r 132} 133 134declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>) 135 136define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) { 137; CHECK-LABEL: oversized_umax_256 138; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 139; CHECK: umaxv {{h[0-9]+}}, [[V0]] 140 %arr.load = load <16 x i16>, <16 x i16>* %arr 141 %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load) 142 ret i16 %r 143} 144 145declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>) 146 147define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { 148; CHECK-LABEL: oversized_umax_512 149; CHECK: umax v 150; CHECK-NEXT: umax v 151; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 152; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]] 153 %arr.load = load <16 x i32>, <16 x i32>* %arr 154 %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load) 155 ret i32 %r 156} 157 158declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>) 159 160define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) { 161; CHECK-LABEL: oversized_umin_256 162; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 163; CHECK: uminv {{h[0-9]+}}, [[V0]] 164 %arr.load = load <16 x i16>, <16 x i16>* %arr 165 %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load) 166 ret i16 %r 167} 168 169declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>) 170 171define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { 172; CHECK-LABEL: oversized_umin_512 173; CHECK: umin v 174; CHECK-NEXT: umin v 175; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 176; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]] 177 %arr.load = load <16 x i32>, <16 x i32>* %arr 178 %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load) 179 ret i32 %r 180} 181 182declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) 183 184define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) { 185; CHECK-LABEL: oversized_smax_256 186; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 187; CHECK: smaxv {{h[0-9]+}}, [[V0]] 188 %arr.load = load <16 x i16>, <16 x i16>* %arr 189 %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load) 190 ret i16 %r 191} 192 193declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) 194 195define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { 196; CHECK-LABEL: oversized_smax_512 197; CHECK: smax v 198; CHECK-NEXT: smax v 199; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 200; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]] 201 %arr.load = load <16 x i32>, <16 x i32>* %arr 202 %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load) 203 ret i32 %r 204} 205 206declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>) 207 208define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) { 209; CHECK-LABEL: oversized_smin_256 210; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 211; CHECK: sminv {{h[0-9]+}}, [[V0]] 212 %arr.load = load <16 x i16>, <16 x i16>* %arr 213 %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load) 214 ret i16 %r 215} 216 217declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>) 218 219define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { 220; CHECK-LABEL: oversized_smin_512 221; CHECK: smin v 222; CHECK-NEXT: smin v 223; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 224; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]] 225 %arr.load = load <16 x i32>, <16 x i32>* %arr 226 %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load) 227 ret i32 %r 228} 229