1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -instcombine -S < %s | FileCheck %s
3
4define i32 @extract_load(<4 x i32>* %p) {
5; CHECK-LABEL: @extract_load(
6; CHECK-NEXT:    [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
7; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1
8; CHECK-NEXT:    ret i32 [[EXT]]
9;
10  %x = load <4 x i32>, <4 x i32>* %p, align 4
11  %ext = extractelement <4 x i32> %x, i32 1
12  ret i32 %ext
13}
14
15define double @extract_load_fp(<4 x double>* %p) {
16; CHECK-LABEL: @extract_load_fp(
17; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
18; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3
19; CHECK-NEXT:    ret double [[EXT]]
20;
21  %x = load <4 x double>, <4 x double>* %p, align 32
22  %ext = extractelement <4 x double> %x, i32 3
23  ret double %ext
24}
25
26define double @extract_load_volatile(<4 x double>* %p) {
27; CHECK-LABEL: @extract_load_volatile(
28; CHECK-NEXT:    [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
29; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2
30; CHECK-NEXT:    ret double [[EXT]]
31;
32  %x = load volatile <4 x double>, <4 x double>* %p
33  %ext = extractelement <4 x double> %x, i32 2
34  ret double %ext
35}
36
37define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
38; CHECK-LABEL: @extract_load_extra_use(
39; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
40; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0
41; CHECK-NEXT:    store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
42; CHECK-NEXT:    ret double [[EXT]]
43;
44  %x = load <4 x double>, <4 x double>* %p, align 8
45  %ext = extractelement <4 x double> %x, i32 0
46  store <4 x double> %x, <4 x double>* %p2
47  ret double %ext
48}
49
50define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
51; CHECK-LABEL: @extract_load_variable_index(
52; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
53; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
54; CHECK-NEXT:    ret double [[EXT]]
55;
56  %x = load <4 x double>, <4 x double>* %p
57  %ext = extractelement <4 x double> %x, i32 %y
58  ret double %ext
59}
60
61define void @scalarize_phi(i32 * %n, float * %inout) {
62; CHECK-LABEL: @scalarize_phi(
63; CHECK-NEXT:  entry:
64; CHECK-NEXT:    [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
65; CHECK-NEXT:    br label [[FOR_COND:%.*]]
66; CHECK:       for.cond:
67; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
68; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
69; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
70; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_0]], [[T1]]
71; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
72; CHECK:       for.body:
73; CHECK-NEXT:    store volatile float [[TMP0]], float* [[INOUT]], align 4
74; CHECK-NEXT:    [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
75; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
76; CHECK-NEXT:    br label [[FOR_COND]]
77; CHECK:       for.end:
78; CHECK-NEXT:    ret void
79;
80entry:
81  %t0 = load volatile float, float * %inout, align 4
82  %insert = insertelement <4 x float> undef, float %t0, i32 0
83  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
84  %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
85  br label %for.cond
86
87for.cond:
88  %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
89  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
90  %t1 = load i32, i32 * %n, align 4
91  %cmp = icmp ne i32 %i.0, %t1
92  br i1 %cmp, label %for.body, label %for.end
93
94for.body:
95  %t2 = extractelement <4 x float> %x.0, i32 1
96  store volatile float %t2, float * %inout, align 4
97  %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
98  %inc = add nsw i32 %i.0, 1
99  br label %for.cond
100
101for.end:
102  ret void
103}
104
105define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
106; CHECK-LABEL: @extract_element_binop_splat_constant_index(
107; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
108; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
109; CHECK-NEXT:    ret float [[R]]
110;
111  %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
112  %r = extractelement <4 x float> %b, i32 2
113  ret float %r
114}
115
116define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
117; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
118; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
119; CHECK-NEXT:    [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
120; CHECK-NEXT:    ret double [[R]]
121;
122  %b = fdiv <2 x double> <double 42.0, double undef>, %x
123  %r = extractelement <2 x double> %b, i32 0
124  ret double %r
125}
126
127define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
128; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
129; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
130; CHECK-NEXT:    [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
131; CHECK-NEXT:    ret float [[R]]
132;
133  %b = fmul <2 x float> %x, <float 42.0, float 43.0>
134  %r = extractelement <2 x float> %b, i32 1
135  ret float %r
136}
137
138define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
139; CHECK-LABEL: @extract_element_binop_splat_variable_index(
140; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
141; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[TMP1]], 42
142; CHECK-NEXT:    ret i8 [[R]]
143;
144  %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
145  %r = extractelement <4 x i8> %b, i32 %y
146  ret i8 %r
147}
148
149define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
150; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
151; CHECK-NEXT:    [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
152; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
153; CHECK-NEXT:    ret i8 [[R]]
154;
155  %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
156  %r = extractelement <4 x i8> %b, i32 %y
157  ret i8 %r
158}
159
160define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
161; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
162; CHECK-NEXT:    [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
163; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
164; CHECK-NEXT:    ret i8 [[R]]
165;
166  %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
167  %r = extractelement <4 x i8> %b, i32 %y
168  ret i8 %r
169}
170
171define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
172; CHECK-LABEL: @extract_element_load(
173; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
174; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2
175; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
176; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
177; CHECK-NEXT:    ret float [[R]]
178;
179  %load = load <4 x float>, <4 x float>* %ptr
180  %add = fadd <4 x float> %x, %load
181  %r = extractelement <4 x float> %add, i32 2
182  ret float %r
183}
184
185define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
186; CHECK-LABEL: @extract_element_multi_Use_load(
187; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
188; CHECK-NEXT:    store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
189; CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
190; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2
191; CHECK-NEXT:    ret float [[R]]
192;
193  %load = load <4 x float>, <4 x float>* %ptr0
194  store <4 x float> %load, <4 x float>* %ptr1
195  %add = fadd <4 x float> %x, %load
196  %r = extractelement <4 x float> %add, i32 2
197  ret float %r
198}
199
200define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
201; CHECK-LABEL: @extract_element_variable_index(
202; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
203; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
204; CHECK-NEXT:    ret float [[R]]
205;
206  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
207  %r = extractelement <4 x float> %add, i32 %y
208  ret float %r
209}
210
211define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
212; CHECK-LABEL: @extelt_binop_insertelt(
213; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
214; CHECK-NEXT:    [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
215; CHECK-NEXT:    ret float [[E]]
216;
217  %C = insertelement <4 x float> %A, float %f, i32 0
218  %D = fmul nnan <4 x float> %C, %B
219  %E = extractelement <4 x float> %D, i32 0
220  ret float %E
221}
222
223; We recurse to find a scalarizable operand.
224; FIXME: We should propagate the IR flags including wrapping flags.
225
226define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
227; CHECK-LABEL: @extelt_binop_binop_insertelt(
228; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
229; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
230; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0
231; CHECK-NEXT:    [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]]
232; CHECK-NEXT:    ret i32 [[E]]
233;
234  %v = insertelement <4 x i32> %A, i32 %f, i32 0
235  %C = add <4 x i32> %v, %B
236  %D = mul nsw <4 x i32> %C, %B
237  %E = extractelement <4 x i32> %D, i32 0
238  ret i32 %E
239}
240
241define float @extract_element_constant_vector_variable_index(i32 %y) {
242; CHECK-LABEL: @extract_element_constant_vector_variable_index(
243; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
244; CHECK-NEXT:    ret float [[R]]
245;
246  %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
247  ret float %r
248}
249
250define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
251; CHECK-LABEL: @cheap_to_extract_icmp(
252; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
253; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
254; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
255; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
256; CHECK-NEXT:    ret i1 [[R]]
257;
258  %cmp = icmp eq <4 x i32> %x, zeroinitializer
259  %and = and <4 x i1> %cmp, %y
260  %r = extractelement <4 x i1> %and, i32 2
261  ret i1 %r
262}
263
264define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
265; CHECK-LABEL: @cheap_to_extract_fcmp(
266; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
267; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
268; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
269; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
270; CHECK-NEXT:    ret i1 [[R]]
271;
272  %cmp = fcmp oeq <4 x float> %x, zeroinitializer
273  %and = and <4 x i1> %cmp, %y
274  %r = extractelement <4 x i1> %and, i32 2
275  ret i1 %r
276}
277
278define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
279; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
280; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0
281; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
282; CHECK-NEXT:    ret i1 [[EXT]]
283;
284  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
285  %ext = extractelement <2 x i1> %cmp, i32 0
286  ret i1 %ext
287}
288
289define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
290; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
291; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0
292; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
293; CHECK-NEXT:    ret i1 [[EXT]]
294;
295  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
296  %ext = extractelement <2 x i1> %cmp, i32 0
297  ret i1 %ext
298}
299
300define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
301; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
302; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
303; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
304; CHECK-NEXT:    ret i1 [[EXT]]
305;
306  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
307  %ext = extractelement <2 x i1> %cmp, i32 %idx
308  ret i1 %ext
309}
310
311define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
312; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
313; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
314; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
315; CHECK-NEXT:    ret i1 [[EXT]]
316;
317  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
318  %ext = extractelement <2 x i1> %cmp, i32 %idx
319  ret i1 %ext
320}
321
322define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
323; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
324; CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
325; CHECK-NEXT:    store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
326; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
327; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0
328; CHECK-NEXT:    ret i1 [[EXT]]
329;
330  %add = fadd <2 x float> %arg1, %arg2
331  store volatile <2 x float> %add, <2 x float>* undef
332  %cmp = fcmp oeq <2 x float> %arg0, %add
333  %ext = extractelement <2 x i1> %cmp, i32 0
334  ret i1 %ext
335}
336