1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -S %s | FileCheck %s 3; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6target triple = "arm64-apple-ios5.0.0" 7 8define void @select_umin_8xi16(i16* %ptr, i16 %x) { 9; CHECK-LABEL: @select_umin_8xi16( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 12; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 13; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 14; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 15; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 16; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 17; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 18; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 19; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 20; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 21; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 22; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 23; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 24; CHECK-NEXT: ret void 25; 26entry: 27 %l.0 = load i16, i16* %ptr 28 %cmp.0 = icmp ult i16 %l.0, 16383 29 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 30 store i16 %s.0, i16* %ptr, align 2 31 32 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 33 %l.1 = load i16, i16* %gep.1 34 %cmp.1 = icmp ult i16 %l.1, 16383 35 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 36 store i16 %s.1, i16* %gep.1, align 2 37 38 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 39 %l.2 = load i16, i16* %gep.2 40 %cmp.2 = icmp ult i16 %l.2, 16383 41 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 42 store i16 %s.2, i16* %gep.2, align 2 43 44 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 45 %l.3 = load i16, i16* %gep.3 46 %cmp.3 = icmp ult i16 %l.3, 16383 47 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 48 store i16 %s.3, i16* %gep.3, align 2 49 50 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 51 %l.4 = load i16, i16* %gep.4 52 %cmp.4 = icmp ult i16 %l.4, 16383 53 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 54 store i16 %s.4, i16* %gep.4, align 2 55 56 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 57 %l.5 = load i16, i16* %gep.5 58 %cmp.5 = icmp ult i16 %l.5, 16383 59 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 60 store i16 %s.5, i16* %gep.5, align 2 61 62 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 63 %l.6 = load i16, i16* %gep.6 64 %cmp.6 = icmp ult i16 %l.6, 16383 65 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 66 store i16 %s.6, i16* %gep.6, align 2 67 68 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 69 %l.7 = load i16, i16* %gep.7 70 %cmp.7 = icmp ult i16 %l.7, 16383 71 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 72 store i16 %s.7, i16* %gep.7, align 2 73 ret void 74} 75 76define void @select_umin_4xi32(i32* %ptr, i32 %x) { 77; CHECK-LABEL: @select_umin_4xi32( 78; CHECK-NEXT: entry: 79; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 80; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 81; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 82; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 83; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 84; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 85; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 86; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 87; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 88; CHECK-NEXT: ret void 89; 90entry: 91 %l.0 = load i32, i32* %ptr 92 %cmp.0 = icmp ult i32 %l.0, 16383 93 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 94 store i32 %s.0, i32* %ptr, align 4 95 96 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 97 %l.1 = load i32, i32* %gep.1 98 %cmp.1 = icmp ult i32 %l.1, 16383 99 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 100 store i32 %s.1, i32* %gep.1, align 4 101 102 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 103 %l.2 = load i32, i32* %gep.2 104 %cmp.2 = icmp ult i32 %l.2, 16383 105 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 106 store i32 %s.2, i32* %gep.2, align 4 107 108 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 109 %l.3 = load i32, i32* %gep.3 110 %cmp.3 = icmp ult i32 %l.3, 16383 111 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 112 store i32 %s.3, i32* %gep.3, align 4 113 114 ret void 115} 116 117define void @select_ule_ugt_mix_4xi32(i32* %ptr, i32 %x) { 118; CHECK-LABEL: @select_ule_ugt_mix_4xi32( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 121; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383 122; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 123; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 124; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 125; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 126; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383 127; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 128; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 129; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 130; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 131; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383 132; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 133; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 134; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 135; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 136; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383 137; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 138; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 139; CHECK-NEXT: ret void 140; 141entry: 142 %l.0 = load i32, i32* %ptr 143 %cmp.0 = icmp ult i32 %l.0, 16383 144 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 145 store i32 %s.0, i32* %ptr, align 4 146 147 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 148 %l.1 = load i32, i32* %gep.1 149 %cmp.1 = icmp ugt i32 %l.1, 16383 150 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 151 store i32 %s.1, i32* %gep.1, align 4 152 153 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 154 %l.2 = load i32, i32* %gep.2 155 %cmp.2 = icmp ult i32 %l.2, 16383 156 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 157 store i32 %s.2, i32* %gep.2, align 4 158 159 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 160 %l.3 = load i32, i32* %gep.3 161 %cmp.3 = icmp ugt i32 %l.3, 16383 162 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 163 store i32 %s.3, i32* %gep.3, align 4 164 165 ret void 166} 167 168; There is no <2 x i64> version of umin, but we can efficiently lower 169; compare/select pairs with uniform predicates. 170define void @select_umin_2xi64(i64* %ptr, i64 %x) { 171; CHECK-LABEL: @select_umin_2xi64( 172; CHECK-NEXT: entry: 173; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 174; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 175; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 176; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383> 177; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 178; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 179; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 180; CHECK-NEXT: ret void 181; 182entry: 183 %l.0 = load i64, i64* %ptr 184 %cmp.0 = icmp ult i64 %l.0, 16383 185 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 186 store i64 %s.0, i64* %ptr, align 4 187 188 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 189 %l.1 = load i64, i64* %gep.1 190 %cmp.1 = icmp ult i64 %l.1, 16383 191 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 192 store i64 %s.1, i64* %gep.1, align 4 193 194 ret void 195} 196 197 198define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) { 199; CHECK-LABEL: @select_umin_ule_8xi16( 200; CHECK-NEXT: entry: 201; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 202; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 203; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 204; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 205; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 206; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 207; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 208; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 209; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 210; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 211; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 212; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 213; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 214; CHECK-NEXT: ret void 215; 216entry: 217 %l.0 = load i16, i16* %ptr 218 %cmp.0 = icmp ule i16 %l.0, 16383 219 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 220 store i16 %s.0, i16* %ptr, align 2 221 222 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 223 %l.1 = load i16, i16* %gep.1 224 %cmp.1 = icmp ule i16 %l.1, 16383 225 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 226 store i16 %s.1, i16* %gep.1, align 2 227 228 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 229 %l.2 = load i16, i16* %gep.2 230 %cmp.2 = icmp ule i16 %l.2, 16383 231 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 232 store i16 %s.2, i16* %gep.2, align 2 233 234 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 235 %l.3 = load i16, i16* %gep.3 236 %cmp.3 = icmp ule i16 %l.3, 16383 237 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 238 store i16 %s.3, i16* %gep.3, align 2 239 240 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 241 %l.4 = load i16, i16* %gep.4 242 %cmp.4 = icmp ule i16 %l.4, 16383 243 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 244 store i16 %s.4, i16* %gep.4, align 2 245 246 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 247 %l.5 = load i16, i16* %gep.5 248 %cmp.5 = icmp ule i16 %l.5, 16383 249 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 250 store i16 %s.5, i16* %gep.5, align 2 251 252 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 253 %l.6 = load i16, i16* %gep.6 254 %cmp.6 = icmp ule i16 %l.6, 16383 255 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 256 store i16 %s.6, i16* %gep.6, align 2 257 258 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 259 %l.7 = load i16, i16* %gep.7 260 %cmp.7 = icmp ule i16 %l.7, 16383 261 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 262 store i16 %s.7, i16* %gep.7, align 2 263 ret void 264} 265 266define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) { 267; CHECK-LABEL: @select_umin_ule_4xi32( 268; CHECK-NEXT: entry: 269; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 270; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 271; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 272; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 273; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 274; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 275; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 276; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 277; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 278; CHECK-NEXT: ret void 279; 280entry: 281 %l.0 = load i32, i32* %ptr 282 %cmp.0 = icmp ule i32 %l.0, 16383 283 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 284 store i32 %s.0, i32* %ptr, align 4 285 286 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 287 %l.1 = load i32, i32* %gep.1 288 %cmp.1 = icmp ule i32 %l.1, 16383 289 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 290 store i32 %s.1, i32* %gep.1, align 4 291 292 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 293 %l.2 = load i32, i32* %gep.2 294 %cmp.2 = icmp ule i32 %l.2, 16383 295 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 296 store i32 %s.2, i32* %gep.2, align 4 297 298 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 299 %l.3 = load i32, i32* %gep.3 300 %cmp.3 = icmp ule i32 %l.3, 16383 301 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 302 store i32 %s.3, i32* %gep.3, align 4 303 304 ret void 305} 306 307; There is no <2 x i64> version of umin, but we can efficiently lower 308; compare/select pairs with uniform predicates. 309define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) { 310; CHECK-LABEL: @select_umin_ule_2xi64( 311; CHECK-NEXT: entry: 312; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 313; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 314; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 315; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383> 316; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 317; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 318; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 319; CHECK-NEXT: ret void 320; 321entry: 322 %l.0 = load i64, i64* %ptr 323 %cmp.0 = icmp ule i64 %l.0, 16383 324 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 325 store i64 %s.0, i64* %ptr, align 4 326 327 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 328 %l.1 = load i64, i64* %gep.1 329 %cmp.1 = icmp ule i64 %l.1, 16383 330 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 331 store i64 %s.1, i64* %gep.1, align 4 332 333 ret void 334} 335 336define void @select_smin_8xi16(i16* %ptr, i16 %x) { 337; CHECK-LABEL: @select_smin_8xi16( 338; CHECK-NEXT: entry: 339; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 340; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 341; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 342; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 343; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 344; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 345; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 346; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 347; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 348; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 349; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 350; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 351; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 352; CHECK-NEXT: ret void 353; 354entry: 355 %l.0 = load i16, i16* %ptr 356 %cmp.0 = icmp slt i16 %l.0, 16383 357 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 358 store i16 %s.0, i16* %ptr, align 2 359 360 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 361 %l.1 = load i16, i16* %gep.1 362 %cmp.1 = icmp slt i16 %l.1, 16383 363 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 364 store i16 %s.1, i16* %gep.1, align 2 365 366 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 367 %l.2 = load i16, i16* %gep.2 368 %cmp.2 = icmp slt i16 %l.2, 16383 369 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 370 store i16 %s.2, i16* %gep.2, align 2 371 372 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 373 %l.3 = load i16, i16* %gep.3 374 %cmp.3 = icmp slt i16 %l.3, 16383 375 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 376 store i16 %s.3, i16* %gep.3, align 2 377 378 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 379 %l.4 = load i16, i16* %gep.4 380 %cmp.4 = icmp slt i16 %l.4, 16383 381 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 382 store i16 %s.4, i16* %gep.4, align 2 383 384 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 385 %l.5 = load i16, i16* %gep.5 386 %cmp.5 = icmp slt i16 %l.5, 16383 387 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 388 store i16 %s.5, i16* %gep.5, align 2 389 390 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 391 %l.6 = load i16, i16* %gep.6 392 %cmp.6 = icmp slt i16 %l.6, 16383 393 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 394 store i16 %s.6, i16* %gep.6, align 2 395 396 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 397 %l.7 = load i16, i16* %gep.7 398 %cmp.7 = icmp slt i16 %l.7, 16383 399 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 400 store i16 %s.7, i16* %gep.7, align 2 401 ret void 402} 403 404define void @select_smin_4xi32(i32* %ptr, i32 %x) { 405; CHECK-LABEL: @select_smin_4xi32( 406; CHECK-NEXT: entry: 407; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 408; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 409; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 410; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 411; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 412; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 413; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 414; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 415; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 416; CHECK-NEXT: ret void 417; 418entry: 419 %l.0 = load i32, i32* %ptr 420 %cmp.0 = icmp slt i32 %l.0, 16383 421 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 422 store i32 %s.0, i32* %ptr, align 4 423 424 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 425 %l.1 = load i32, i32* %gep.1 426 %cmp.1 = icmp slt i32 %l.1, 16383 427 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 428 store i32 %s.1, i32* %gep.1, align 4 429 430 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 431 %l.2 = load i32, i32* %gep.2 432 %cmp.2 = icmp slt i32 %l.2, 16383 433 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 434 store i32 %s.2, i32* %gep.2, align 4 435 436 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 437 %l.3 = load i32, i32* %gep.3 438 %cmp.3 = icmp slt i32 %l.3, 16383 439 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 440 store i32 %s.3, i32* %gep.3, align 4 441 442 ret void 443} 444 445; There is no <2 x i64> version of smin, but we can efficiently lower 446; compare/select pairs with uniform predicates. 447define void @select_smin_2xi64(i64* %ptr, i64 %x) { 448; CHECK-LABEL: @select_smin_2xi64( 449; CHECK-NEXT: entry: 450; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 451; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 452; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 453; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383> 454; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 455; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 456; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 457; CHECK-NEXT: ret void 458; 459entry: 460 %l.0 = load i64, i64* %ptr 461 %cmp.0 = icmp slt i64 %l.0, 16383 462 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 463 store i64 %s.0, i64* %ptr, align 4 464 465 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 466 %l.1 = load i64, i64* %gep.1 467 %cmp.1 = icmp slt i64 %l.1, 16383 468 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 469 store i64 %s.1, i64* %gep.1, align 4 470 471 ret void 472} 473 474define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) { 475; CHECK-LABEL: @select_smin_sle_8xi16( 476; CHECK-NEXT: entry: 477; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 478; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 479; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 480; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 481; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 482; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 483; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 484; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 485; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 486; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 487; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 488; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 489; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 490; CHECK-NEXT: ret void 491; 492entry: 493 %l.0 = load i16, i16* %ptr 494 %cmp.0 = icmp sle i16 %l.0, 16383 495 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 496 store i16 %s.0, i16* %ptr, align 2 497 498 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 499 %l.1 = load i16, i16* %gep.1 500 %cmp.1 = icmp sle i16 %l.1, 16383 501 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 502 store i16 %s.1, i16* %gep.1, align 2 503 504 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 505 %l.2 = load i16, i16* %gep.2 506 %cmp.2 = icmp sle i16 %l.2, 16383 507 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 508 store i16 %s.2, i16* %gep.2, align 2 509 510 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 511 %l.3 = load i16, i16* %gep.3 512 %cmp.3 = icmp sle i16 %l.3, 16383 513 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 514 store i16 %s.3, i16* %gep.3, align 2 515 516 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 517 %l.4 = load i16, i16* %gep.4 518 %cmp.4 = icmp sle i16 %l.4, 16383 519 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 520 store i16 %s.4, i16* %gep.4, align 2 521 522 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 523 %l.5 = load i16, i16* %gep.5 524 %cmp.5 = icmp sle i16 %l.5, 16383 525 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 526 store i16 %s.5, i16* %gep.5, align 2 527 528 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 529 %l.6 = load i16, i16* %gep.6 530 %cmp.6 = icmp sle i16 %l.6, 16383 531 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 532 store i16 %s.6, i16* %gep.6, align 2 533 534 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 535 %l.7 = load i16, i16* %gep.7 536 %cmp.7 = icmp sle i16 %l.7, 16383 537 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 538 store i16 %s.7, i16* %gep.7, align 2 539 ret void 540} 541 542define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) { 543; CHECK-LABEL: @select_smin_sle_4xi32( 544; CHECK-NEXT: entry: 545; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 546; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 547; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 548; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 549; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 550; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 551; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 552; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 553; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 554; CHECK-NEXT: ret void 555; 556entry: 557 %l.0 = load i32, i32* %ptr 558 %cmp.0 = icmp sle i32 %l.0, 16383 559 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 560 store i32 %s.0, i32* %ptr, align 4 561 562 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 563 %l.1 = load i32, i32* %gep.1 564 %cmp.1 = icmp sle i32 %l.1, 16383 565 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 566 store i32 %s.1, i32* %gep.1, align 4 567 568 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 569 %l.2 = load i32, i32* %gep.2 570 %cmp.2 = icmp sle i32 %l.2, 16383 571 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 572 store i32 %s.2, i32* %gep.2, align 4 573 574 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 575 %l.3 = load i32, i32* %gep.3 576 %cmp.3 = icmp sle i32 %l.3, 16383 577 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 578 store i32 %s.3, i32* %gep.3, align 4 579 580 ret void 581} 582 583; There is no <2 x i64> version of smin, but we can efficiently lower 584; compare/select pairs with uniform predicates. 585define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) { 586; CHECK-LABEL: @select_smin_sle_2xi64( 587; CHECK-NEXT: entry: 588; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 589; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 590; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 591; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383> 592; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 593; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 594; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 595; CHECK-NEXT: ret void 596; 597entry: 598 %l.0 = load i64, i64* %ptr 599 %cmp.0 = icmp sle i64 %l.0, 16383 600 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 601 store i64 %s.0, i64* %ptr, align 4 602 603 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 604 %l.1 = load i64, i64* %gep.1 605 %cmp.1 = icmp sle i64 %l.1, 16383 606 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 607 store i64 %s.1, i64* %gep.1, align 4 608 609 ret void 610} 611define void @select_umax_8xi16(i16* %ptr, i16 %x) { 612; CHECK-LABEL: @select_umax_8xi16( 613; CHECK-NEXT: entry: 614; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 615; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 616; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 617; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 618; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 619; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 620; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 621; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 622; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 623; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 624; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 625; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 626; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 627; CHECK-NEXT: ret void 628; 629entry: 630 %l.0 = load i16, i16* %ptr 631 %cmp.0 = icmp ugt i16 %l.0, 16383 632 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 633 store i16 %s.0, i16* %ptr, align 2 634 635 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 636 %l.1 = load i16, i16* %gep.1 637 %cmp.1 = icmp ugt i16 %l.1, 16383 638 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 639 store i16 %s.1, i16* %gep.1, align 2 640 641 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 642 %l.2 = load i16, i16* %gep.2 643 %cmp.2 = icmp ugt i16 %l.2, 16383 644 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 645 store i16 %s.2, i16* %gep.2, align 2 646 647 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 648 %l.3 = load i16, i16* %gep.3 649 %cmp.3 = icmp ugt i16 %l.3, 16383 650 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 651 store i16 %s.3, i16* %gep.3, align 2 652 653 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 654 %l.4 = load i16, i16* %gep.4 655 %cmp.4 = icmp ugt i16 %l.4, 16383 656 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 657 store i16 %s.4, i16* %gep.4, align 2 658 659 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 660 %l.5 = load i16, i16* %gep.5 661 %cmp.5 = icmp ugt i16 %l.5, 16383 662 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 663 store i16 %s.5, i16* %gep.5, align 2 664 665 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 666 %l.6 = load i16, i16* %gep.6 667 %cmp.6 = icmp ugt i16 %l.6, 16383 668 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 669 store i16 %s.6, i16* %gep.6, align 2 670 671 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 672 %l.7 = load i16, i16* %gep.7 673 %cmp.7 = icmp ugt i16 %l.7, 16383 674 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 675 store i16 %s.7, i16* %gep.7, align 2 676 ret void 677} 678 679define void @select_umax_4xi32(i32* %ptr, i32 %x) { 680; CHECK-LABEL: @select_umax_4xi32( 681; CHECK-NEXT: entry: 682; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 683; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 684; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 685; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 686; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 687; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 688; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 689; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 690; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 691; CHECK-NEXT: ret void 692; 693entry: 694 %l.0 = load i32, i32* %ptr 695 %cmp.0 = icmp ugt i32 %l.0, 16383 696 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 697 store i32 %s.0, i32* %ptr, align 4 698 699 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 700 %l.1 = load i32, i32* %gep.1 701 %cmp.1 = icmp ugt i32 %l.1, 16383 702 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 703 store i32 %s.1, i32* %gep.1, align 4 704 705 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 706 %l.2 = load i32, i32* %gep.2 707 %cmp.2 = icmp ugt i32 %l.2, 16383 708 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 709 store i32 %s.2, i32* %gep.2, align 4 710 711 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 712 %l.3 = load i32, i32* %gep.3 713 %cmp.3 = icmp ugt i32 %l.3, 16383 714 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 715 store i32 %s.3, i32* %gep.3, align 4 716 717 ret void 718} 719 720; There is no <2 x i64> version of umax, but we can efficiently lower 721; compare/select pairs with uniform predicates. 722define void @select_umax_2xi64(i64* %ptr, i64 %x) { 723; CHECK-LABEL: @select_umax_2xi64( 724; CHECK-NEXT: entry: 725; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 726; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 727; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 728; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383> 729; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 730; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 731; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 732; CHECK-NEXT: ret void 733; 734entry: 735 %l.0 = load i64, i64* %ptr 736 %cmp.0 = icmp ugt i64 %l.0, 16383 737 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 738 store i64 %s.0, i64* %ptr, align 4 739 740 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 741 %l.1 = load i64, i64* %gep.1 742 %cmp.1 = icmp ugt i64 %l.1, 16383 743 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 744 store i64 %s.1, i64* %gep.1, align 4 745 746 ret void 747} 748 749define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) { 750; CHECK-LABEL: @select_umax_uge_8xi16( 751; CHECK-NEXT: entry: 752; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 753; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 754; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 755; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 756; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 757; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 758; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 759; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 760; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 761; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 762; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 763; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 764; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 765; CHECK-NEXT: ret void 766; 767entry: 768 %l.0 = load i16, i16* %ptr 769 %cmp.0 = icmp uge i16 %l.0, 16383 770 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 771 store i16 %s.0, i16* %ptr, align 2 772 773 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 774 %l.1 = load i16, i16* %gep.1 775 %cmp.1 = icmp uge i16 %l.1, 16383 776 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 777 store i16 %s.1, i16* %gep.1, align 2 778 779 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 780 %l.2 = load i16, i16* %gep.2 781 %cmp.2 = icmp uge i16 %l.2, 16383 782 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 783 store i16 %s.2, i16* %gep.2, align 2 784 785 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 786 %l.3 = load i16, i16* %gep.3 787 %cmp.3 = icmp uge i16 %l.3, 16383 788 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 789 store i16 %s.3, i16* %gep.3, align 2 790 791 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 792 %l.4 = load i16, i16* %gep.4 793 %cmp.4 = icmp uge i16 %l.4, 16383 794 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 795 store i16 %s.4, i16* %gep.4, align 2 796 797 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 798 %l.5 = load i16, i16* %gep.5 799 %cmp.5 = icmp uge i16 %l.5, 16383 800 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 801 store i16 %s.5, i16* %gep.5, align 2 802 803 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 804 %l.6 = load i16, i16* %gep.6 805 %cmp.6 = icmp uge i16 %l.6, 16383 806 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 807 store i16 %s.6, i16* %gep.6, align 2 808 809 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 810 %l.7 = load i16, i16* %gep.7 811 %cmp.7 = icmp uge i16 %l.7, 16383 812 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 813 store i16 %s.7, i16* %gep.7, align 2 814 ret void 815} 816 817define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) { 818; CHECK-LABEL: @select_umax_uge_4xi32( 819; CHECK-NEXT: entry: 820; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 821; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 822; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 823; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 824; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 825; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 826; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 827; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 828; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 829; CHECK-NEXT: ret void 830; 831entry: 832 %l.0 = load i32, i32* %ptr 833 %cmp.0 = icmp uge i32 %l.0, 16383 834 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 835 store i32 %s.0, i32* %ptr, align 4 836 837 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 838 %l.1 = load i32, i32* %gep.1 839 %cmp.1 = icmp uge i32 %l.1, 16383 840 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 841 store i32 %s.1, i32* %gep.1, align 4 842 843 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 844 %l.2 = load i32, i32* %gep.2 845 %cmp.2 = icmp uge i32 %l.2, 16383 846 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 847 store i32 %s.2, i32* %gep.2, align 4 848 849 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 850 %l.3 = load i32, i32* %gep.3 851 %cmp.3 = icmp uge i32 %l.3, 16383 852 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 853 store i32 %s.3, i32* %gep.3, align 4 854 855 ret void 856} 857 858; There is no <2 x i64> version of umax, but we can efficiently lower 859; compare/select pairs with uniform predicates. 860define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) { 861; CHECK-LABEL: @select_umax_uge_2xi64( 862; CHECK-NEXT: entry: 863; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 864; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 865; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 866; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383> 867; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 868; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 869; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 870; CHECK-NEXT: ret void 871; 872entry: 873 %l.0 = load i64, i64* %ptr 874 %cmp.0 = icmp uge i64 %l.0, 16383 875 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 876 store i64 %s.0, i64* %ptr, align 4 877 878 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 879 %l.1 = load i64, i64* %gep.1 880 %cmp.1 = icmp uge i64 %l.1, 16383 881 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 882 store i64 %s.1, i64* %gep.1, align 4 883 884 ret void 885} 886 887define void @select_smax_8xi16(i16* %ptr, i16 %x) { 888; CHECK-LABEL: @select_smax_8xi16( 889; CHECK-NEXT: entry: 890; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 891; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 892; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 893; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 894; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 895; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 896; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 897; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 898; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 899; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 900; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 901; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 902; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 903; CHECK-NEXT: ret void 904; 905entry: 906 %l.0 = load i16, i16* %ptr 907 %cmp.0 = icmp sgt i16 %l.0, 16383 908 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 909 store i16 %s.0, i16* %ptr, align 2 910 911 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 912 %l.1 = load i16, i16* %gep.1 913 %cmp.1 = icmp sgt i16 %l.1, 16383 914 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 915 store i16 %s.1, i16* %gep.1, align 2 916 917 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 918 %l.2 = load i16, i16* %gep.2 919 %cmp.2 = icmp sgt i16 %l.2, 16383 920 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 921 store i16 %s.2, i16* %gep.2, align 2 922 923 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 924 %l.3 = load i16, i16* %gep.3 925 %cmp.3 = icmp sgt i16 %l.3, 16383 926 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 927 store i16 %s.3, i16* %gep.3, align 2 928 929 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 930 %l.4 = load i16, i16* %gep.4 931 %cmp.4 = icmp sgt i16 %l.4, 16383 932 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 933 store i16 %s.4, i16* %gep.4, align 2 934 935 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 936 %l.5 = load i16, i16* %gep.5 937 %cmp.5 = icmp sgt i16 %l.5, 16383 938 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 939 store i16 %s.5, i16* %gep.5, align 2 940 941 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 942 %l.6 = load i16, i16* %gep.6 943 %cmp.6 = icmp sgt i16 %l.6, 16383 944 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 945 store i16 %s.6, i16* %gep.6, align 2 946 947 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 948 %l.7 = load i16, i16* %gep.7 949 %cmp.7 = icmp sgt i16 %l.7, 16383 950 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 951 store i16 %s.7, i16* %gep.7, align 2 952 ret void 953} 954 955define void @select_smax_4xi32(i32* %ptr, i32 %x) { 956; CHECK-LABEL: @select_smax_4xi32( 957; CHECK-NEXT: entry: 958; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 959; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 960; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 961; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 962; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 963; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 964; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 965; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 966; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 967; CHECK-NEXT: ret void 968; 969entry: 970 %l.0 = load i32, i32* %ptr 971 %cmp.0 = icmp sgt i32 %l.0, 16383 972 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 973 store i32 %s.0, i32* %ptr, align 4 974 975 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 976 %l.1 = load i32, i32* %gep.1 977 %cmp.1 = icmp sgt i32 %l.1, 16383 978 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 979 store i32 %s.1, i32* %gep.1, align 4 980 981 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 982 %l.2 = load i32, i32* %gep.2 983 %cmp.2 = icmp sgt i32 %l.2, 16383 984 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 985 store i32 %s.2, i32* %gep.2, align 4 986 987 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 988 %l.3 = load i32, i32* %gep.3 989 %cmp.3 = icmp sgt i32 %l.3, 16383 990 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 991 store i32 %s.3, i32* %gep.3, align 4 992 993 ret void 994} 995 996; There is no <2 x i64> version of smax, but we can efficiently lower 997; compare/select pairs with uniform predicates. 998define void @select_smax_2xi64(i64* %ptr, i64 %x) { 999; CHECK-LABEL: @select_smax_2xi64( 1000; CHECK-NEXT: entry: 1001; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 1002; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 1003; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 1004; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383> 1005; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 1006; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 1007; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 1008; CHECK-NEXT: ret void 1009; 1010entry: 1011 %l.0 = load i64, i64* %ptr 1012 %cmp.0 = icmp sgt i64 %l.0, 16383 1013 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 1014 store i64 %s.0, i64* %ptr, align 4 1015 1016 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 1017 %l.1 = load i64, i64* %gep.1 1018 %cmp.1 = icmp sgt i64 %l.1, 16383 1019 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 1020 store i64 %s.1, i64* %gep.1, align 4 1021 1022 ret void 1023} 1024 1025 1026define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) { 1027; CHECK-LABEL: @select_smax_sge_8xi16( 1028; CHECK-NEXT: entry: 1029; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 1030; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 1031; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 1032; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 1033; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 1034; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 1035; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 1036; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 1037; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 1038; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 1039; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383> 1040; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* 1041; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 1042; CHECK-NEXT: ret void 1043; 1044entry: 1045 %l.0 = load i16, i16* %ptr 1046 %cmp.0 = icmp sge i16 %l.0, 16383 1047 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 1048 store i16 %s.0, i16* %ptr, align 2 1049 1050 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1 1051 %l.1 = load i16, i16* %gep.1 1052 %cmp.1 = icmp sge i16 %l.1, 16383 1053 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 1054 store i16 %s.1, i16* %gep.1, align 2 1055 1056 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2 1057 %l.2 = load i16, i16* %gep.2 1058 %cmp.2 = icmp sge i16 %l.2, 16383 1059 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 1060 store i16 %s.2, i16* %gep.2, align 2 1061 1062 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3 1063 %l.3 = load i16, i16* %gep.3 1064 %cmp.3 = icmp sge i16 %l.3, 16383 1065 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 1066 store i16 %s.3, i16* %gep.3, align 2 1067 1068 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4 1069 %l.4 = load i16, i16* %gep.4 1070 %cmp.4 = icmp sge i16 %l.4, 16383 1071 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 1072 store i16 %s.4, i16* %gep.4, align 2 1073 1074 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5 1075 %l.5 = load i16, i16* %gep.5 1076 %cmp.5 = icmp sge i16 %l.5, 16383 1077 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 1078 store i16 %s.5, i16* %gep.5, align 2 1079 1080 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6 1081 %l.6 = load i16, i16* %gep.6 1082 %cmp.6 = icmp sge i16 %l.6, 16383 1083 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 1084 store i16 %s.6, i16* %gep.6, align 2 1085 1086 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7 1087 %l.7 = load i16, i16* %gep.7 1088 %cmp.7 = icmp sge i16 %l.7, 16383 1089 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 1090 store i16 %s.7, i16* %gep.7, align 2 1091 ret void 1092} 1093 1094define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) { 1095; CHECK-LABEL: @select_smax_sge_4xi32( 1096; CHECK-NEXT: entry: 1097; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 1098; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 1099; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 1100; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 1101; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1102; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383> 1103; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383> 1104; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* 1105; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 1106; CHECK-NEXT: ret void 1107; 1108entry: 1109 %l.0 = load i32, i32* %ptr 1110 %cmp.0 = icmp sge i32 %l.0, 16383 1111 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 1112 store i32 %s.0, i32* %ptr, align 4 1113 1114 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1 1115 %l.1 = load i32, i32* %gep.1 1116 %cmp.1 = icmp sge i32 %l.1, 16383 1117 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 1118 store i32 %s.1, i32* %gep.1, align 4 1119 1120 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2 1121 %l.2 = load i32, i32* %gep.2 1122 %cmp.2 = icmp sge i32 %l.2, 16383 1123 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 1124 store i32 %s.2, i32* %gep.2, align 4 1125 1126 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3 1127 %l.3 = load i32, i32* %gep.3 1128 %cmp.3 = icmp sge i32 %l.3, 16383 1129 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 1130 store i32 %s.3, i32* %gep.3, align 4 1131 1132 ret void 1133} 1134 1135; There is no <2 x i64> version of smax, but we can efficiently lower 1136; compare/select pairs with uniform predicates. 1137define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) { 1138; CHECK-LABEL: @select_smax_sge_2xi64( 1139; CHECK-NEXT: entry: 1140; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1 1141; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 1142; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 1143; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383> 1144; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383> 1145; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* 1146; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4 1147; CHECK-NEXT: ret void 1148; 1149entry: 1150 %l.0 = load i64, i64* %ptr 1151 %cmp.0 = icmp sge i64 %l.0, 16383 1152 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 1153 store i64 %s.0, i64* %ptr, align 4 1154 1155 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1 1156 %l.1 = load i64, i64* %gep.1 1157 %cmp.1 = icmp sge i64 %l.1, 16383 1158 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 1159 store i64 %s.1, i64* %gep.1, align 4 1160 1161 ret void 1162} 1163