1; RUN: opt -S -instcombine < %s | FileCheck %s 2 3declare void @v4float_user(<4 x float>) #0 4 5define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 6; CHECK-LABEL: @extract_one_select( 7; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %c, 0 8; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a 9; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 10; CHECK-NEXT: ret float [[EXTRACT]] 11; 12 %cmp = icmp ne i32 %c, 0 13 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 14 %extract = extractelement <4 x float> %sel, i32 2 15 ret float %extract 16} 17 18; Multiple extractelements 19define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 20; CHECK-LABEL: @extract_two_select( 21; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %c, 0 22; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a 23; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> 24; CHECK-NEXT: ret <2 x float> [[BUILD2]] 25; 26 %cmp = icmp ne i32 %c, 0 27 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 28 %extract1 = extractelement <4 x float> %sel, i32 1 29 %extract2 = extractelement <4 x float> %sel, i32 2 30 %build1 = insertelement <2 x float> undef, float %extract1, i32 0 31 %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 32 ret <2 x float> %build2 33} 34 35; Select has an extra non-extractelement user, don't change it 36define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 37; CHECK-LABEL: @extract_one_select_user( 38; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %c, 0 39; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a 40; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 41; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) 42; CHECK-NEXT: ret float [[EXTRACT]] 43; 44 %cmp = icmp ne i32 %c, 0 45 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 46 %extract = extractelement <4 x float> %sel, i32 2 47 call void @v4float_user(<4 x float> %sel) 48 ret float %extract 49} 50 51define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 52; CHECK-LABEL: @extract_one_vselect_user( 53; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer 54; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a 55; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 56; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) 57; CHECK-NEXT: ret float [[EXTRACT]] 58; 59 %cmp = icmp ne <4 x i32> %c, zeroinitializer 60 %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 61 %extract = extractelement <4 x float> %sel, i32 2 62 call void @v4float_user(<4 x float> %sel) 63 ret float %extract 64} 65 66; Do not convert the vector select into a scalar select. That would increase 67; the instruction count and potentially obfuscate a vector min/max idiom. 68 69define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 70; CHECK-LABEL: @extract_one_vselect( 71; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer 72; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a 73; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0 74; CHECK-NEXT: ret float [[EXTRACT]] 75; 76 %cmp = icmp ne <4 x i32> %c, zeroinitializer 77 %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 78 %extract = extractelement <4 x float> %select, i32 0 79 ret float %extract 80} 81 82; Multiple extractelements from a vector select 83define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 84; CHECK-LABEL: @extract_two_vselect( 85; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer 86; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a 87; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> 88; CHECK-NEXT: ret <2 x float> [[BUILD2]] 89; 90 %cmp = icmp ne <4 x i32> %c, zeroinitializer 91 %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 92 %extract1 = extractelement <4 x float> %sel, i32 1 93 %extract2 = extractelement <4 x float> %sel, i32 2 94 %build1 = insertelement <2 x float> undef, float %extract1, i32 0 95 %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 96 ret <2 x float> %build2 97} 98 99; The vector selects are not decomposed into scalar selects because that would increase 100; the instruction count. Extract+insert is converted to non-lane-crossing shuffles. 101; Test multiple extractelements 102define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 103; CHECK-LABEL: @simple_vector_select( 104; CHECK-NEXT: entry: 105; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> %c, i32 0 106; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 107; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL]], <4 x float> %b, <4 x float> %a 108; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 1 109; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[TMP1]], 0 110; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1]], <4 x float> %b, <4 x float> %a 111; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> 112; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2 113; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i32 [[TMP3]], 0 114; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6]], <4 x float> %b, <4 x float> %a 115; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 116; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> %c, i32 3 117; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[TMP5]], 0 118; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11]], <4 x float> %b, <4 x float> %a 119; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 120; CHECK-NEXT: ret <4 x float> [[TMP6]] 121; 122entry: 123 %0 = extractelement <4 x i32> %c, i32 0 124 %tobool = icmp ne i32 %0, 0 125 %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b 126 %1 = extractelement <4 x float> %a.sink, i32 0 127 %2 = insertelement <4 x float> undef, float %1, i32 0 128 %3 = extractelement <4 x i32> %c, i32 1 129 %tobool1 = icmp ne i32 %3, 0 130 %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b 131 %4 = extractelement <4 x float> %a.sink1, i32 1 132 %5 = insertelement <4 x float> %2, float %4, i32 1 133 %6 = extractelement <4 x i32> %c, i32 2 134 %tobool6 = icmp ne i32 %6, 0 135 %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b 136 %7 = extractelement <4 x float> %a.sink2, i32 2 137 %8 = insertelement <4 x float> %5, float %7, i32 2 138 %9 = extractelement <4 x i32> %c, i32 3 139 %tobool11 = icmp ne i32 %9, 0 140 %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b 141 %10 = extractelement <4 x float> %a.sink3, i32 3 142 %11 = insertelement <4 x float> %8, float %10, i32 3 143 ret <4 x float> %11 144} 145 146attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 147