1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -instcombine -S < %s | FileCheck %s 3 4define i32 @extract_load(<4 x i32>* %p) { 5; CHECK-LABEL: @extract_load( 6; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 7; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1 8; CHECK-NEXT: ret i32 [[EXT]] 9; 10 %x = load <4 x i32>, <4 x i32>* %p, align 4 11 %ext = extractelement <4 x i32> %x, i32 1 12 ret i32 %ext 13} 14 15define double @extract_load_fp(<4 x double>* %p) { 16; CHECK-LABEL: @extract_load_fp( 17; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 18; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3 19; CHECK-NEXT: ret double [[EXT]] 20; 21 %x = load <4 x double>, <4 x double>* %p, align 32 22 %ext = extractelement <4 x double> %x, i32 3 23 ret double %ext 24} 25 26define double @extract_load_volatile(<4 x double>* %p) { 27; CHECK-LABEL: @extract_load_volatile( 28; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32 29; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2 30; CHECK-NEXT: ret double [[EXT]] 31; 32 %x = load volatile <4 x double>, <4 x double>* %p 33 %ext = extractelement <4 x double> %x, i32 2 34 ret double %ext 35} 36 37define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) { 38; CHECK-LABEL: @extract_load_extra_use( 39; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8 40; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0 41; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32 42; CHECK-NEXT: ret double [[EXT]] 43; 44 %x = load <4 x double>, <4 x double>* %p, align 8 45 %ext = extractelement <4 x double> %x, i32 0 46 store <4 x double> %x, <4 x double>* %p2 47 ret double %ext 48} 49 50define double @extract_load_variable_index(<4 x double>* %p, i32 %y) { 51; CHECK-LABEL: @extract_load_variable_index( 52; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 53; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]] 54; CHECK-NEXT: ret double [[EXT]] 55; 56 %x = load <4 x double>, <4 x double>* %p 57 %ext = extractelement <4 x double> %x, i32 %y 58 ret double %ext 59} 60 61define void @scalarize_phi(i32 * %n, float * %inout) { 62; CHECK-LABEL: @scalarize_phi( 63; CHECK-NEXT: entry: 64; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4 65; CHECK-NEXT: br label [[FOR_COND:%.*]] 66; CHECK: for.cond: 67; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ] 68; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 69; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4 70; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_0]], [[T1]] 71; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]] 72; CHECK: for.body: 73; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4 74; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000 75; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 76; CHECK-NEXT: br label [[FOR_COND]] 77; CHECK: for.end: 78; CHECK-NEXT: ret void 79; 80entry: 81 %t0 = load volatile float, float * %inout, align 4 82 %insert = insertelement <4 x float> undef, float %t0, i32 0 83 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer 84 %insert1 = insertelement <4 x float> undef, float 3.0, i32 0 85 br label %for.cond 86 87for.cond: 88 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ] 89 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 90 %t1 = load i32, i32 * %n, align 4 91 %cmp = icmp ne i32 %i.0, %t1 92 br i1 %cmp, label %for.body, label %for.end 93 94for.body: 95 %t2 = extractelement <4 x float> %x.0, i32 1 96 store volatile float %t2, float * %inout, align 4 97 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 98 %inc = add nsw i32 %i.0, 1 99 br label %for.cond 100 101for.end: 102 ret void 103} 104 105define float @extract_element_binop_splat_constant_index(<4 x float> %x) { 106; CHECK-LABEL: @extract_element_binop_splat_constant_index( 107; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 108; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000 109; CHECK-NEXT: ret float [[R]] 110; 111 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 112 %r = extractelement <4 x float> %b, i32 2 113 ret float %r 114} 115 116define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) { 117; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index( 118; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0 119; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]] 120; CHECK-NEXT: ret double [[R]] 121; 122 %b = fdiv <2 x double> <double 42.0, double undef>, %x 123 %r = extractelement <2 x double> %b, i32 0 124 ret double %r 125} 126 127define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) { 128; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index( 129; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 130; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01 131; CHECK-NEXT: ret float [[R]] 132; 133 %b = fmul <2 x float> %x, <float 42.0, float 43.0> 134 %r = extractelement <2 x float> %b, i32 1 135 ret float %r 136} 137 138define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { 139; CHECK-LABEL: @extract_element_binop_splat_variable_index( 140; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]] 141; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42 142; CHECK-NEXT: ret i8 [[R]] 143; 144 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42> 145 %r = extractelement <4 x i8> %b, i32 %y 146 ret i8 %r 147} 148 149define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) { 150; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index( 151; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42> 152; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 153; CHECK-NEXT: ret i8 [[R]] 154; 155 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42> 156 %r = extractelement <4 x i8> %b, i32 %y 157 ret i8 %r 158} 159 160define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) { 161; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index( 162; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2> 163; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 164; CHECK-NEXT: ret i8 [[R]] 165; 166 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2> 167 %r = extractelement <4 x i8> %b, i32 %y 168 ret i8 %r 169} 170 171define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) { 172; CHECK-LABEL: @extract_element_load( 173; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16 174; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2 175; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 176; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]] 177; CHECK-NEXT: ret float [[R]] 178; 179 %load = load <4 x float>, <4 x float>* %ptr 180 %add = fadd <4 x float> %x, %load 181 %r = extractelement <4 x float> %add, i32 2 182 ret float %r 183} 184 185define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) { 186; CHECK-LABEL: @extract_element_multi_Use_load( 187; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16 188; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16 189; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]] 190; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2 191; CHECK-NEXT: ret float [[R]] 192; 193 %load = load <4 x float>, <4 x float>* %ptr0 194 store <4 x float> %load, <4 x float>* %ptr1 195 %add = fadd <4 x float> %x, %load 196 %r = extractelement <4 x float> %add, i32 2 197 ret float %r 198} 199 200define float @extract_element_variable_index(<4 x float> %x, i32 %y) { 201; CHECK-LABEL: @extract_element_variable_index( 202; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]] 203; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00 204; CHECK-NEXT: ret float [[R]] 205; 206 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 207 %r = extractelement <4 x float> %add, i32 %y 208 ret float %r 209} 210 211define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) { 212; CHECK-LABEL: @extelt_binop_insertelt( 213; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 214; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]] 215; CHECK-NEXT: ret float [[E]] 216; 217 %C = insertelement <4 x float> %A, float %f, i32 0 218 %D = fmul nnan <4 x float> %C, %B 219 %E = extractelement <4 x float> %D, i32 0 220 ret float %E 221} 222 223; We recurse to find a scalarizable operand. 224; FIXME: We should propagate the IR flags including wrapping flags. 225 226define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) { 227; CHECK-LABEL: @extelt_binop_binop_insertelt( 228; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0 229; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]] 230; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0 231; CHECK-NEXT: [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]] 232; CHECK-NEXT: ret i32 [[E]] 233; 234 %v = insertelement <4 x i32> %A, i32 %f, i32 0 235 %C = add <4 x i32> %v, %B 236 %D = mul nsw <4 x i32> %C, %B 237 %E = extractelement <4 x i32> %D, i32 0 238 ret i32 %E 239} 240 241define float @extract_element_constant_vector_variable_index(i32 %y) { 242; CHECK-LABEL: @extract_element_constant_vector_variable_index( 243; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]] 244; CHECK-NEXT: ret float [[R]] 245; 246 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y 247 ret float %r 248} 249 250define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) { 251; CHECK-LABEL: @cheap_to_extract_icmp( 252; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 253; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 254; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 255; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 256; CHECK-NEXT: ret i1 [[R]] 257; 258 %cmp = icmp eq <4 x i32> %x, zeroinitializer 259 %and = and <4 x i1> %cmp, %y 260 %r = extractelement <4 x i1> %and, i32 2 261 ret i1 %r 262} 263 264define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) { 265; CHECK-LABEL: @cheap_to_extract_fcmp( 266; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 267; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 268; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 269; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 270; CHECK-NEXT: ret i1 [[R]] 271; 272 %cmp = fcmp oeq <4 x float> %x, zeroinitializer 273 %and = and <4 x i1> %cmp, %y 274 %r = extractelement <4 x i1> %and, i32 2 275 ret i1 %r 276} 277 278define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) { 279; CHECK-LABEL: @extractelt_vector_icmp_constrhs( 280; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0 281; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 282; CHECK-NEXT: ret i1 [[EXT]] 283; 284 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 285 %ext = extractelement <2 x i1> %cmp, i32 0 286 ret i1 %ext 287} 288 289define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) { 290; CHECK-LABEL: @extractelt_vector_fcmp_constrhs( 291; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0 292; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 293; CHECK-NEXT: ret i1 [[EXT]] 294; 295 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 296 %ext = extractelement <2 x i1> %cmp, i32 0 297 ret i1 %ext 298} 299 300define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) { 301; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx( 302; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]] 303; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 304; CHECK-NEXT: ret i1 [[EXT]] 305; 306 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 307 %ext = extractelement <2 x i1> %cmp, i32 %idx 308 ret i1 %ext 309} 310 311define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) { 312; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx( 313; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]] 314; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 315; CHECK-NEXT: ret i1 [[EXT]] 316; 317 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 318 %ext = extractelement <2 x i1> %cmp, i32 %idx 319 ret i1 %ext 320} 321 322define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) { 323; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use( 324; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]] 325; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8 326; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]] 327; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0 328; CHECK-NEXT: ret i1 [[EXT]] 329; 330 %add = fadd <2 x float> %arg1, %arg2 331 store volatile <2 x float> %add, <2 x float>* undef 332 %cmp = fcmp oeq <2 x float> %arg0, %add 333 %ext = extractelement <2 x i1> %cmp, i32 0 334 ret i1 %ext 335} 336