1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3 4; GCN-LABEL: {{^}}reduction_half4: 5; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 6; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 7 8; VI: v_add_f16_sdwa 9; VI-NEXT: v_add_f16_e32 10; VI-NEXT: v_add_f16_e32 11define half @reduction_half4(<4 x half> %vec4) { 12entry: 13 %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 14 %bin.rdx = fadd fast <4 x half> %vec4, %rdx.shuf 15 %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 16 %bin.rdx2 = fadd fast <4 x half> %bin.rdx, %rdx.shuf1 17 %res = extractelement <4 x half> %bin.rdx2, i32 0 18 ret half %res 19} 20 21; GCN-LABEL: {{^}}reduction_v4i16: 22; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 23; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 24 25; VI: v_add_u16_sdwa 26; VI-NEXT: v_add_u16_e32 27; VI-NEXT: v_add_u16_e32 28define i16 @reduction_v4i16(<4 x i16> %vec4) { 29entry: 30 %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 31 %bin.rdx = add <4 x i16> %vec4, %rdx.shuf 32 %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 33 %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1 34 %res = extractelement <4 x i16> %bin.rdx2, i32 0 35 ret i16 %res 36} 37 38; GCN-LABEL: {{^}}reduction_half8: 39; GFX9: v_pk_add_f16 [[ADD1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 40; GFX9-NEXT: v_pk_add_f16 [[ADD2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 41; GFX9-NEXT: v_pk_add_f16 [[ADD3:v[0-9]+]], [[ADD2]], [[ADD1]]{{$}} 42; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 43 44; VI: v_add_f16_sdwa 45; VI-NEXT: v_add_f16_sdwa 46; VI-NEXT: v_add_f16_e32 47; VI-NEXT: v_add_f16_e32 48; VI-NEXT: v_add_f16_e32 49; VI-NEXT: v_add_f16_e32 50; VI-NEXT: v_add_f16_e32 51 52define half @reduction_half8(<8 x half> %vec8) { 53entry: 54 %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 55 %bin.rdx = fadd fast <8 x half> %vec8, %rdx.shuf 56 %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 57 %bin.rdx2 = fadd fast <8 x half> %bin.rdx, %rdx.shuf1 58 %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 59 %bin.rdx4 = fadd fast <8 x half> %bin.rdx2, %rdx.shuf3 60 %res = extractelement <8 x half> %bin.rdx4, i32 0 61 ret half %res 62} 63 64; GCN-LABEL: {{^}}reduction_v8i16: 65; GFX9: v_pk_add_u16 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 66; GFX9-NEXT: v_pk_add_u16 [[ADD2]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 67; GFX9-NEXT: v_pk_add_u16 [[ADD3]], [[ADD2]], [[ADD1]]{{$}} 68; GFX9-NEXT: v_add_u16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 69 70; VI: v_add_u16_sdwa 71; VI-NEXT: v_add_u16_sdwa 72; VI-NEXT: v_add_u16_e32 73; VI-NEXT: v_add_u16_e32 74; VI-NEXT: v_add_u16_e32 75; VI-NEXT: v_add_u16_e32 76; VI-NEXT: v_add_u16_e32 77 78define i16 @reduction_v8i16(<8 x i16> %vec8) { 79entry: 80 %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 81 %bin.rdx = add <8 x i16> %vec8, %rdx.shuf 82 %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 83 %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1 84 %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 85 %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3 86 %res = extractelement <8 x i16> %bin.rdx4, i32 0 87 ret i16 %res 88} 89 90; GCN-LABEL: {{^}}reduction_half16: 91; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 92; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 93; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 94; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 95; GFX9-NEXT: v_pk_add_f16 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 96; GFX9-NEXT: v_pk_add_f16 [[ADD2]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 97; GFX9-NEXT: v_pk_add_f16 [[ADD3]], [[ADD2]], [[ADD1]]{{$}} 98; GFX9-NEXT: v_add_f16_sdwa v{{[0-9]+}}, [[ADD3]], [[ADD3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 99 100; VI: v_add_f16_sdwa 101; VI-NEXT: v_add_f16_sdwa 102; VI-NEXT: v_add_f16_sdwa 103; VI-NEXT: v_add_f16_sdwa 104; VI-NEXT: v_add_f16_e32 105; VI-NEXT: v_add_f16_e32 106; VI-NEXT: v_add_f16_e32 107; VI-NEXT: v_add_f16_e32 108; VI-NEXT: v_add_f16_e32 109; VI-NEXT: v_add_f16_e32 110; VI-NEXT: v_add_f16_e32 111; VI-NEXT: v_add_f16_e32 112; VI-NEXT: v_add_f16_e32 113; VI-NEXT: v_add_f16_e32 114; VI-NEXT: v_add_f16_e32 115 116define half @reduction_half16(<16 x half> %vec16) { 117entry: 118 %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 119 %bin.rdx = fadd fast <16 x half> %vec16, %rdx.shuf 120 %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 121 %bin.rdx2 = fadd fast <16 x half> %bin.rdx, %rdx.shuf1 122 %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 123 %bin.rdx4 = fadd fast <16 x half> %bin.rdx2, %rdx.shuf3 124 %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 125 %bin.rdx6 = fadd fast <16 x half> %bin.rdx4, %rdx.shuf5 126 %res = extractelement <16 x half> %bin.rdx6, i32 0 127 ret half %res 128} 129 130; GCN-LABEL: {{^}}reduction_min_v4i16: 131; GFX9: v_pk_min_u16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 132; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 133 134; VI: v_min_u16_sdwa 135; VI-NEXT: v_min_u16_e32 136; VI-NEXT: v_min_u16_e32 137define i16 @reduction_min_v4i16(<4 x i16> %vec4) { 138entry: 139 %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 140 %rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf 141 %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf 142 %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 143 %rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1 144 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 145 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 146 ret i16 %res 147} 148 149; GCN-LABEL: {{^}}reduction_umin_v8i16: 150; GFX9: v_pk_min_u16 [[MIN1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 151; GFX9-NEXT: v_pk_min_u16 [[MIN2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 152; GFX9-NEXT: v_pk_min_u16 [[MIN3:v[0-9]+]], [[MIN2]], [[MIN1]]{{$}} 153; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, [[MIN3]], [[MIN3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 154 155; VI: v_min_u16_sdwa 156; VI-NEXT: v_min_u16_sdwa 157; VI-NEXT: v_min_u16_e32 158; VI-NEXT: v_min_u16_e32 159; VI-NEXT: v_min_u16_e32 160; VI-NEXT: v_min_u16_e32 161; VI-NEXT: v_min_u16_e32 162define i16 @reduction_umin_v8i16(<8 x i16> %vec8) { 163entry: 164 %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 165 %rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf 166 %rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf 167 %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 168 %rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1 169 %rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1 170 %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 171 %rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4 172 %rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4 173 %res = extractelement <8 x i16> %rdx.minmax.select6, i32 0 174 ret i16 %res 175} 176 177; Tests to make sure without slp the number of instructions are more. 178; GCN-LABEL: {{^}}reduction_umin_v8i16_woslp: 179; GFX9: v_lshrrev_b32_e32 180; GFX9-NEXT: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 181; GFX9-NEXT: v_lshrrev_b32_e32 182; GFX9-NEXT: v_min3_u16 183; GFX9-NEXT: v_lshrrev_b32_e32 184; GFX9-NEXT: v_min3_u16 185; GFX9-NEXT: v_min3_u16 186define i16 @reduction_umin_v8i16_woslp(<8 x i16> %vec8) { 187entry: 188 %elt0 = extractelement <8 x i16> %vec8, i64 0 189 %elt1 = extractelement <8 x i16> %vec8, i64 1 190 %elt2 = extractelement <8 x i16> %vec8, i64 2 191 %elt3 = extractelement <8 x i16> %vec8, i64 3 192 %elt4 = extractelement <8 x i16> %vec8, i64 4 193 %elt5 = extractelement <8 x i16> %vec8, i64 5 194 %elt6 = extractelement <8 x i16> %vec8, i64 6 195 %elt7 = extractelement <8 x i16> %vec8, i64 7 196 197 %cmp0 = icmp ult i16 %elt1, %elt0 198 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 199 %cmp1 = icmp ult i16 %elt2, %min1 200 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 201 %cmp2 = icmp ult i16 %elt3, %min2 202 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 203 204 %cmp3 = icmp ult i16 %elt4, %min3 205 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 206 %cmp4 = icmp ult i16 %elt5, %min4 207 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 208 209 %cmp5 = icmp ult i16 %elt6, %min5 210 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 211 %cmp6 = icmp ult i16 %elt7, %min6 212 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 213 214 ret i16 %min7 215} 216 217; GCN-LABEL: {{^}}reduction_smin_v16i16: 218; GFX9: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 219; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 220; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 221; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 222; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 223; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 224; GFX9-NEXT: v_pk_min_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 225; GFX9-NEXT: v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 226 227; VI: v_min_i16_sdwa 228; VI-NEXT: v_min_i16_sdwa 229; VI-NEXT: v_min_i16_sdwa 230; VI-NEXT: v_min_i16_sdwa 231; VI-NEXT: v_min_i16_e32 232; VI-NEXT: v_min_i16_e32 233; VI-NEXT: v_min_i16_e32 234; VI-NEXT: v_min_i16_e32 235; VI-NEXT: v_min_i16_e32 236; VI-NEXT: v_min_i16_e32 237; VI-NEXT: v_min_i16_e32 238; VI-NEXT: v_min_i16_e32 239; VI-NEXT: v_min_i16_e32 240; VI-NEXT: v_min_i16_e32 241; VI-NEXT: v_min_i16_e32 242define i16 @reduction_smin_v16i16(<16 x i16> %vec16) { 243entry: 244 %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 245 %rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf 246 %rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf 247 %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 248 %rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1 249 %rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1 250 %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 251 %rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4 252 %rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4 253 %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 254 %rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7 255 %rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7 256 %res = extractelement <16 x i16> %rdx.minmax.select9, i32 0 257 ret i16 %res 258} 259 260; Tests to make sure without slp the number of instructions are more. 261; GCN-LABEL: {{^}}reduction_smin_v16i16_woslp: 262; GFX9: v_lshrrev_b32_e32 263; GFX9-NEXT: v_min_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 264; GFX9-NEXT: v_lshrrev_b32_e32 265; GFX9-NEXT: v_min3_i16 266; GFX9-NEXT: v_lshrrev_b32_e32 267; GFX9-NEXT: v_min3_i16 268; GFX9-NEXT: v_lshrrev_b32_e32 269; GFX9-NEXT: v_min3_i16 270; GFX9-NEXT: v_lshrrev_b32_e32 271; GFX9-NEXT: v_min3_i16 272; GFX9-NEXT: v_lshrrev_b32_e32 273; GFX9-NEXT: v_min3_i16 274; GFX9-NEXT: v_lshrrev_b32_e32 275; GFX9-NEXT: v_min3_i16 276; GFX9-NEXT: v_min3_i16 277define i16 @reduction_smin_v16i16_woslp(<16 x i16> %vec16) { 278entry: 279 %elt0 = extractelement <16 x i16> %vec16, i64 0 280 %elt1 = extractelement <16 x i16> %vec16, i64 1 281 %elt2 = extractelement <16 x i16> %vec16, i64 2 282 %elt3 = extractelement <16 x i16> %vec16, i64 3 283 %elt4 = extractelement <16 x i16> %vec16, i64 4 284 %elt5 = extractelement <16 x i16> %vec16, i64 5 285 %elt6 = extractelement <16 x i16> %vec16, i64 6 286 %elt7 = extractelement <16 x i16> %vec16, i64 7 287 288 %elt8 = extractelement <16 x i16> %vec16, i64 8 289 %elt9 = extractelement <16 x i16> %vec16, i64 9 290 %elt10 = extractelement <16 x i16> %vec16, i64 10 291 %elt11 = extractelement <16 x i16> %vec16, i64 11 292 %elt12 = extractelement <16 x i16> %vec16, i64 12 293 %elt13 = extractelement <16 x i16> %vec16, i64 13 294 %elt14 = extractelement <16 x i16> %vec16, i64 14 295 %elt15 = extractelement <16 x i16> %vec16, i64 15 296 297 %cmp0 = icmp slt i16 %elt1, %elt0 298 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 299 %cmp1 = icmp slt i16 %elt2, %min1 300 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 301 %cmp2 = icmp slt i16 %elt3, %min2 302 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 303 304 %cmp3 = icmp slt i16 %elt4, %min3 305 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 306 %cmp4 = icmp slt i16 %elt5, %min4 307 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 308 309 %cmp5 = icmp slt i16 %elt6, %min5 310 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 311 %cmp6 = icmp slt i16 %elt7, %min6 312 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 313 314 %cmp7 = icmp slt i16 %elt8, %min7 315 %min8 = select i1 %cmp7, i16 %elt8, i16 %min7 316 %cmp8 = icmp slt i16 %elt9, %min8 317 %min9 = select i1 %cmp8, i16 %elt9, i16 %min8 318 319 %cmp9 = icmp slt i16 %elt10, %min9 320 %min10 = select i1 %cmp9, i16 %elt10, i16 %min9 321 %cmp10 = icmp slt i16 %elt11, %min10 322 %min11 = select i1 %cmp10, i16 %elt11, i16 %min10 323 324 %cmp11 = icmp slt i16 %elt12, %min11 325 %min12 = select i1 %cmp11, i16 %elt12, i16 %min11 326 %cmp12 = icmp slt i16 %elt13, %min12 327 %min13 = select i1 %cmp12, i16 %elt13, i16 %min12 328 329 %cmp13 = icmp slt i16 %elt14, %min13 330 %min14 = select i1 %cmp13, i16 %elt14, i16 %min13 331 %cmp14 = icmp slt i16 %elt15, %min14 332 %min15 = select i1 %cmp14, i16 %elt15, i16 %min14 333 334 335 ret i16 %min15 336} 337 338; GCN-LABEL: {{^}}reduction_umax_v4i16: 339; GFX9: v_pk_max_u16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 340; GFX9-NEXT: v_max_u16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 341 342; VI: v_max_u16_sdwa 343; VI-NEXT: v_max_u16_e32 344; VI-NEXT: v_max_u16_e32 345define i16 @reduction_umax_v4i16(<4 x i16> %vec4) { 346entry: 347 %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 348 %rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf 349 %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf 350 %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 351 %rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1 352 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 353 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 354 ret i16 %res 355} 356 357; GCN-LABEL: {{^}}reduction_smax_v4i16: 358; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 359; GFX9-NEXT: v_max_i16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 360 361; VI: v_max_i16_sdwa 362; VI-NEXT: v_max_i16_e32 363; VI-NEXT: v_max_i16_e32 364define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 { 365entry: 366 %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 367 %rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf 368 %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf 369 %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 370 %rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1 371 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 372 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 373 ret i16 %res 374} 375 376; GCN-LABEL: {{^}}reduction_fmax_v4half: 377; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 378; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 379 380; VI: v_max_f16_sdwa 381; VI-NEXT: v_max_f16_e32 382; VI-NEXT: v_max_f16_e32 383define half @reduction_fmax_v4half(<4 x half> %vec4) { 384entry: 385 %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 386 %rdx.minmax.cmp = fcmp fast ogt <4 x half> %vec4, %rdx.shuf 387 %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf 388 %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 389 %rdx.minmax.cmp2 = fcmp fast ogt <4 x half> %rdx.minmax.select, %rdx.shuf1 390 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1 391 %res = extractelement <4 x half> %rdx.minmax.select3, i32 0 392 ret half %res 393} 394 395; GCN-LABEL: {{^}}reduction_fmin_v4half: 396; GFX9: v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} 397; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 398 399; VI: v_min_f16_sdwa 400; VI-NEXT: v_min_f16_e32 401; VI-NEXT: v_min_f16_e32 402define half @reduction_fmin_v4half(<4 x half> %vec4) { 403entry: 404 %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 405 %rdx.minmax.cmp = fcmp fast olt <4 x half> %vec4, %rdx.shuf 406 %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf 407 %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 408 %rdx.minmax.cmp2 = fcmp fast olt <4 x half> %rdx.minmax.select, %rdx.shuf1 409 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1 410 %res = extractelement <4 x half> %rdx.minmax.select3, i32 0 411 ret half %res 412} 413