1; RUN: opt -S -loop-vectorize -dce -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
5@A = common global [1024 x i32] zeroinitializer, align 16
6@fA = common global [1024 x float] zeroinitializer, align 16
7@dA = common global [1024 x double] zeroinitializer, align 16
8
9; Signed tests.
10
11; Turn this into a max reduction. Make sure we use a splat to initialize the
12; vector for the reduction.
13; CHECK-LABEL: @max_red(
14; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
15; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
16; CHECK: icmp sgt <2 x i32>
17; CHECK: select <2 x i1>
18; CHECK: middle.block
19; CHECK: icmp sgt <2 x i32>
20; CHECK: select <2 x i1>
21
22define i32 @max_red(i32 %max) {
23entry:
24  br label %for.body
25
26for.body:
27  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
28  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
29  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
30  %0 = load i32, i32* %arrayidx, align 4
31  %cmp3 = icmp sgt i32 %0, %max.red.08
32  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
33  %indvars.iv.next = add i64 %indvars.iv, 1
34  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
35  %exitcond = icmp eq i32 %lftr.wideiv, 1024
36  br i1 %exitcond, label %for.end, label %for.body
37
38for.end:
39  ret i32 %max.red.0
40}
41
42; Turn this into a max reduction. The select has its inputs reversed therefore
43; this is a max reduction.
44; CHECK-LABEL: @max_red_inverse_select(
45; CHECK: icmp slt <2 x i32>
46; CHECK: select <2 x i1>
47; CHECK: middle.block
48; CHECK: icmp sgt <2 x i32>
49; CHECK: select <2 x i1>
50
51define i32 @max_red_inverse_select(i32 %max) {
52entry:
53  br label %for.body
54
55for.body:
56  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
57  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
58  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
59  %0 = load i32, i32* %arrayidx, align 4
60  %cmp3 = icmp slt i32 %max.red.08, %0
61  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
62  %indvars.iv.next = add i64 %indvars.iv, 1
63  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
64  %exitcond = icmp eq i32 %lftr.wideiv, 1024
65  br i1 %exitcond, label %for.end, label %for.body
66
67for.end:
68  ret i32 %max.red.0
69}
70
71; Turn this into a min reduction.
72; CHECK-LABEL: @min_red(
73; CHECK: icmp slt <2 x i32>
74; CHECK: select <2 x i1>
75; CHECK: middle.block
76; CHECK: icmp slt <2 x i32>
77; CHECK: select <2 x i1>
78
79define i32 @min_red(i32 %max) {
80entry:
81  br label %for.body
82
83for.body:
84  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
85  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
86  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
87  %0 = load i32, i32* %arrayidx, align 4
88  %cmp3 = icmp slt i32 %0, %max.red.08
89  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
90  %indvars.iv.next = add i64 %indvars.iv, 1
91  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
92  %exitcond = icmp eq i32 %lftr.wideiv, 1024
93  br i1 %exitcond, label %for.end, label %for.body
94
95for.end:
96  ret i32 %max.red.0
97}
98
99; Turn this into a min reduction. The select has its inputs reversed therefore
100; this is a min reduction.
101; CHECK-LABEL: @min_red_inverse_select(
102; CHECK: icmp sgt <2 x i32>
103; CHECK: select <2 x i1>
104; CHECK: middle.block
105; CHECK: icmp slt <2 x i32>
106; CHECK: select <2 x i1>
107
108define i32 @min_red_inverse_select(i32 %max) {
109entry:
110  br label %for.body
111
112for.body:
113  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
114  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
115  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
116  %0 = load i32, i32* %arrayidx, align 4
117  %cmp3 = icmp sgt i32 %max.red.08, %0
118  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
119  %indvars.iv.next = add i64 %indvars.iv, 1
120  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
121  %exitcond = icmp eq i32 %lftr.wideiv, 1024
122  br i1 %exitcond, label %for.end, label %for.body
123
124for.end:
125  ret i32 %max.red.0
126}
127
128; Unsigned tests.
129
130; Turn this into a max reduction.
131; CHECK-LABEL: @umax_red(
132; CHECK: icmp ugt <2 x i32>
133; CHECK: select <2 x i1>
134; CHECK: middle.block
135; CHECK: icmp ugt <2 x i32>
136; CHECK: select <2 x i1>
137
138define i32 @umax_red(i32 %max) {
139entry:
140  br label %for.body
141
142for.body:
143  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
144  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
145  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
146  %0 = load i32, i32* %arrayidx, align 4
147  %cmp3 = icmp ugt i32 %0, %max.red.08
148  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
149  %indvars.iv.next = add i64 %indvars.iv, 1
150  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
151  %exitcond = icmp eq i32 %lftr.wideiv, 1024
152  br i1 %exitcond, label %for.end, label %for.body
153
154for.end:
155  ret i32 %max.red.0
156}
157
158; Turn this into a max reduction. The select has its inputs reversed therefore
159; this is a max reduction.
160; CHECK-LABEL: @umax_red_inverse_select(
161; CHECK: icmp ult <2 x i32>
162; CHECK: select <2 x i1>
163; CHECK: middle.block
164; CHECK: icmp ugt <2 x i32>
165; CHECK: select <2 x i1>
166
167define i32 @umax_red_inverse_select(i32 %max) {
168entry:
169  br label %for.body
170
171for.body:
172  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
173  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
174  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
175  %0 = load i32, i32* %arrayidx, align 4
176  %cmp3 = icmp ult i32 %max.red.08, %0
177  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
178  %indvars.iv.next = add i64 %indvars.iv, 1
179  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
180  %exitcond = icmp eq i32 %lftr.wideiv, 1024
181  br i1 %exitcond, label %for.end, label %for.body
182
183for.end:
184  ret i32 %max.red.0
185}
186
187; Turn this into a min reduction.
188; CHECK-LABEL: @umin_red(
189; CHECK: icmp ult <2 x i32>
190; CHECK: select <2 x i1>
191; CHECK: middle.block
192; CHECK: icmp ult <2 x i32>
193; CHECK: select <2 x i1>
194
195define i32 @umin_red(i32 %max) {
196entry:
197  br label %for.body
198
199for.body:
200  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
201  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
202  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
203  %0 = load i32, i32* %arrayidx, align 4
204  %cmp3 = icmp ult i32 %0, %max.red.08
205  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
206  %indvars.iv.next = add i64 %indvars.iv, 1
207  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
208  %exitcond = icmp eq i32 %lftr.wideiv, 1024
209  br i1 %exitcond, label %for.end, label %for.body
210
211for.end:
212  ret i32 %max.red.0
213}
214
215; Turn this into a min reduction. The select has its inputs reversed therefore
216; this is a min reduction.
217; CHECK-LABEL: @umin_red_inverse_select(
218; CHECK: icmp ugt <2 x i32>
219; CHECK: select <2 x i1>
220; CHECK: middle.block
221; CHECK: icmp ult <2 x i32>
222; CHECK: select <2 x i1>
223
224define i32 @umin_red_inverse_select(i32 %max) {
225entry:
226  br label %for.body
227
228for.body:
229  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
230  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
231  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
232  %0 = load i32, i32* %arrayidx, align 4
233  %cmp3 = icmp ugt i32 %max.red.08, %0
234  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
235  %indvars.iv.next = add i64 %indvars.iv, 1
236  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
237  %exitcond = icmp eq i32 %lftr.wideiv, 1024
238  br i1 %exitcond, label %for.end, label %for.body
239
240for.end:
241  ret i32 %max.red.0
242}
243
244; SGE -> SLT
245; Turn this into a min reduction (select inputs are reversed).
246; CHECK-LABEL: @sge_min_red(
247; CHECK: icmp sge <2 x i32>
248; CHECK: select <2 x i1>
249; CHECK: middle.block
250; CHECK: icmp slt <2 x i32>
251; CHECK: select <2 x i1>
252
253define i32 @sge_min_red(i32 %max) {
254entry:
255  br label %for.body
256
257for.body:
258  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
259  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
260  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
261  %0 = load i32, i32* %arrayidx, align 4
262  %cmp3 = icmp sge i32 %0, %max.red.08
263  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
264  %indvars.iv.next = add i64 %indvars.iv, 1
265  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
266  %exitcond = icmp eq i32 %lftr.wideiv, 1024
267  br i1 %exitcond, label %for.end, label %for.body
268
269for.end:
270  ret i32 %max.red.0
271}
272
273; SLE -> SGT
274; Turn this into a max reduction (select inputs are reversed).
275; CHECK-LABEL: @sle_min_red(
276; CHECK: icmp sle <2 x i32>
277; CHECK: select <2 x i1>
278; CHECK: middle.block
279; CHECK: icmp sgt <2 x i32>
280; CHECK: select <2 x i1>
281
282define i32 @sle_min_red(i32 %max) {
283entry:
284  br label %for.body
285
286for.body:
287  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
288  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
289  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
290  %0 = load i32, i32* %arrayidx, align 4
291  %cmp3 = icmp sle i32 %0, %max.red.08
292  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
293  %indvars.iv.next = add i64 %indvars.iv, 1
294  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
295  %exitcond = icmp eq i32 %lftr.wideiv, 1024
296  br i1 %exitcond, label %for.end, label %for.body
297
298for.end:
299  ret i32 %max.red.0
300}
301
302; UGE -> ULT
303; Turn this into a min reduction (select inputs are reversed).
304; CHECK-LABEL: @uge_min_red(
305; CHECK: icmp uge <2 x i32>
306; CHECK: select <2 x i1>
307; CHECK: middle.block
308; CHECK: icmp ult <2 x i32>
309; CHECK: select <2 x i1>
310
311define i32 @uge_min_red(i32 %max) {
312entry:
313  br label %for.body
314
315for.body:
316  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
317  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
318  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
319  %0 = load i32, i32* %arrayidx, align 4
320  %cmp3 = icmp uge i32 %0, %max.red.08
321  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
322  %indvars.iv.next = add i64 %indvars.iv, 1
323  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
324  %exitcond = icmp eq i32 %lftr.wideiv, 1024
325  br i1 %exitcond, label %for.end, label %for.body
326
327for.end:
328  ret i32 %max.red.0
329}
330
331; ULE -> UGT
332; Turn this into a max reduction (select inputs are reversed).
333; CHECK-LABEL: @ule_min_red(
334; CHECK: icmp ule <2 x i32>
335; CHECK: select <2 x i1>
336; CHECK: middle.block
337; CHECK: icmp ugt <2 x i32>
338; CHECK: select <2 x i1>
339
340define i32 @ule_min_red(i32 %max) {
341entry:
342  br label %for.body
343
344for.body:
345  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
346  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
347  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
348  %0 = load i32, i32* %arrayidx, align 4
349  %cmp3 = icmp ule i32 %0, %max.red.08
350  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
351  %indvars.iv.next = add i64 %indvars.iv, 1
352  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
353  %exitcond = icmp eq i32 %lftr.wideiv, 1024
354  br i1 %exitcond, label %for.end, label %for.body
355
356for.end:
357  ret i32 %max.red.0
358}
359
360; No reduction.
361; CHECK-LABEL: @no_red_1(
362; CHECK-NOT: icmp <2 x i32>
363define i32 @no_red_1(i32 %max) {
364entry:
365  br label %for.body
366
367for.body:
368  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
369  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
370  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
371  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
372  %0 = load i32, i32* %arrayidx, align 4
373  %1 = load i32, i32* %arrayidx1, align 4
374  %cmp3 = icmp sgt i32 %0, %1
375  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
376  %indvars.iv.next = add i64 %indvars.iv, 1
377  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
378  %exitcond = icmp eq i32 %lftr.wideiv, 1024
379  br i1 %exitcond, label %for.end, label %for.body
380
381for.end:
382  ret i32 %max.red.0
383}
384
385; CHECK-LABEL: @no_red_2(
386; CHECK-NOT: icmp <2 x i32>
387define i32 @no_red_2(i32 %max) {
388entry:
389  br label %for.body
390
391for.body:
392  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
393  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
394  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
395  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
396  %0 = load i32, i32* %arrayidx, align 4
397  %1 = load i32, i32* %arrayidx1, align 4
398  %cmp3 = icmp sgt i32 %0, %max.red.08
399  %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
400  %indvars.iv.next = add i64 %indvars.iv, 1
401  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
402  %exitcond = icmp eq i32 %lftr.wideiv, 1024
403  br i1 %exitcond, label %for.end, label %for.body
404
405for.end:
406  ret i32 %max.red.0
407}
408
409; Float tests.
410
411; Maximum.
412
413; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
414; CHECK-LABEL: @max_red_float(
415; CHECK: fcmp fast ogt <2 x float>
416; CHECK: select <2 x i1>
417; CHECK: middle.block
418; CHECK: fcmp fast ogt <2 x float>
419; CHECK: select fast <2 x i1>
420
421define float @max_red_float(float %max) #0 {
422entry:
423  br label %for.body
424
425for.body:
426  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
427  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
428  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
429  %0 = load float, float* %arrayidx, align 4
430  %cmp3 = fcmp fast ogt float %0, %max.red.08
431  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
432  %indvars.iv.next = add i64 %indvars.iv, 1
433  %exitcond = icmp eq i64 %indvars.iv.next, 1024
434  br i1 %exitcond, label %for.end, label %for.body
435
436for.end:
437  ret float %max.red.0
438}
439
440; CHECK-LABEL: @max_red_float_ge(
441; CHECK: fcmp fast oge <2 x float>
442; CHECK: select <2 x i1>
443; CHECK: middle.block
444; CHECK: fcmp fast ogt <2 x float>
445; CHECK: select fast <2 x i1>
446
447define float @max_red_float_ge(float %max) #0 {
448entry:
449  br label %for.body
450
451for.body:
452  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
453  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
454  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
455  %0 = load float, float* %arrayidx, align 4
456  %cmp3 = fcmp fast oge float %0, %max.red.08
457  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
458  %indvars.iv.next = add i64 %indvars.iv, 1
459  %exitcond = icmp eq i64 %indvars.iv.next, 1024
460  br i1 %exitcond, label %for.end, label %for.body
461
462for.end:
463  ret float %max.red.0
464}
465
466; CHECK-LABEL: @inverted_max_red_float(
467; CHECK: fcmp fast olt <2 x float>
468; CHECK: select <2 x i1>
469; CHECK: middle.block
470; CHECK: fcmp fast ogt <2 x float>
471; CHECK: select fast <2 x i1>
472
473define float @inverted_max_red_float(float %max) #0 {
474entry:
475  br label %for.body
476
477for.body:
478  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
479  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
480  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
481  %0 = load float, float* %arrayidx, align 4
482  %cmp3 = fcmp fast olt float %0, %max.red.08
483  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
484  %indvars.iv.next = add i64 %indvars.iv, 1
485  %exitcond = icmp eq i64 %indvars.iv.next, 1024
486  br i1 %exitcond, label %for.end, label %for.body
487
488for.end:
489  ret float %max.red.0
490}
491
492; CHECK-LABEL: @inverted_max_red_float_le(
493; CHECK: fcmp fast ole <2 x float>
494; CHECK: select <2 x i1>
495; CHECK: middle.block
496; CHECK: fcmp fast ogt <2 x float>
497; CHECK: select fast <2 x i1>
498
499define float @inverted_max_red_float_le(float %max) #0 {
500entry:
501  br label %for.body
502
503for.body:
504  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
505  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
506  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
507  %0 = load float, float* %arrayidx, align 4
508  %cmp3 = fcmp fast ole float %0, %max.red.08
509  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
510  %indvars.iv.next = add i64 %indvars.iv, 1
511  %exitcond = icmp eq i64 %indvars.iv.next, 1024
512  br i1 %exitcond, label %for.end, label %for.body
513
514for.end:
515  ret float %max.red.0
516}
517
518; CHECK-LABEL: @unordered_max_red_float(
519; CHECK: fcmp fast ugt <2 x float>
520; CHECK: select <2 x i1>
521; CHECK: middle.block
522; CHECK: fcmp fast ogt <2 x float>
523; CHECK: select fast <2 x i1>
524
525define float @unordered_max_red_float(float %max) #0 {
526entry:
527  br label %for.body
528
529for.body:
530  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
531  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
532  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
533  %0 = load float, float* %arrayidx, align 4
534  %cmp3 = fcmp fast ugt float %0, %max.red.08
535  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
536  %indvars.iv.next = add i64 %indvars.iv, 1
537  %exitcond = icmp eq i64 %indvars.iv.next, 1024
538  br i1 %exitcond, label %for.end, label %for.body
539
540for.end:
541  ret float %max.red.0
542}
543
544; CHECK-LABEL: @unordered_max_red_float_ge(
545; CHECK: fcmp fast uge <2 x float>
546; CHECK: select <2 x i1>
547; CHECK: middle.block
548; CHECK: fcmp fast ogt <2 x float>
549; CHECK: select fast <2 x i1>
550
551define float @unordered_max_red_float_ge(float %max) #0 {
552entry:
553  br label %for.body
554
555for.body:
556  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
557  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
558  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
559  %0 = load float, float* %arrayidx, align 4
560  %cmp3 = fcmp fast uge float %0, %max.red.08
561  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
562  %indvars.iv.next = add i64 %indvars.iv, 1
563  %exitcond = icmp eq i64 %indvars.iv.next, 1024
564  br i1 %exitcond, label %for.end, label %for.body
565
566for.end:
567  ret float %max.red.0
568}
569
570; CHECK-LABEL: @inverted_unordered_max_red_float(
571; CHECK: fcmp fast ult <2 x float>
572; CHECK: select <2 x i1>
573; CHECK: middle.block
574; CHECK: fcmp fast ogt <2 x float>
575; CHECK: select fast <2 x i1>
576
577define float @inverted_unordered_max_red_float(float %max) #0 {
578entry:
579  br label %for.body
580
581for.body:
582  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
583  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
584  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
585  %0 = load float, float* %arrayidx, align 4
586  %cmp3 = fcmp fast ult float %0, %max.red.08
587  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
588  %indvars.iv.next = add i64 %indvars.iv, 1
589  %exitcond = icmp eq i64 %indvars.iv.next, 1024
590  br i1 %exitcond, label %for.end, label %for.body
591
592for.end:
593  ret float %max.red.0
594}
595
596; CHECK-LABEL: @inverted_unordered_max_red_float_le(
597; CHECK: fcmp fast ule <2 x float>
598; CHECK: select <2 x i1>
599; CHECK: middle.block
600; CHECK: fcmp fast ogt <2 x float>
601; CHECK: select fast <2 x i1>
602
603define float @inverted_unordered_max_red_float_le(float %max) #0 {
604entry:
605  br label %for.body
606
607for.body:
608  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
609  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
610  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
611  %0 = load float, float* %arrayidx, align 4
612  %cmp3 = fcmp fast ule float %0, %max.red.08
613  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
614  %indvars.iv.next = add i64 %indvars.iv, 1
615  %exitcond = icmp eq i64 %indvars.iv.next, 1024
616  br i1 %exitcond, label %for.end, label %for.body
617
618for.end:
619  ret float %max.red.0
620}
621
622; Minimum.
623
624; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
625; CHECK-LABEL: @min_red_float(
626; CHECK: fcmp fast olt <2 x float>
627; CHECK: select <2 x i1>
628; CHECK: middle.block
629; CHECK: fcmp fast olt <2 x float>
630; CHECK: select fast <2 x i1>
631
632define float @min_red_float(float %min) #0 {
633entry:
634  br label %for.body
635
636for.body:
637  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
638  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
639  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
640  %0 = load float, float* %arrayidx, align 4
641  %cmp3 = fcmp fast olt float %0, %min.red.08
642  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
643  %indvars.iv.next = add i64 %indvars.iv, 1
644  %exitcond = icmp eq i64 %indvars.iv.next, 1024
645  br i1 %exitcond, label %for.end, label %for.body
646
647for.end:
648  ret float %min.red.0
649}
650
651; CHECK-LABEL: @min_red_float_le(
652; CHECK: fcmp fast ole <2 x float>
653; CHECK: select <2 x i1>
654; CHECK: middle.block
655; CHECK: fcmp fast olt <2 x float>
656; CHECK: select fast <2 x i1>
657
658define float @min_red_float_le(float %min) #0 {
659entry:
660  br label %for.body
661
662for.body:
663  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
664  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
665  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
666  %0 = load float, float* %arrayidx, align 4
667  %cmp3 = fcmp fast ole float %0, %min.red.08
668  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
669  %indvars.iv.next = add i64 %indvars.iv, 1
670  %exitcond = icmp eq i64 %indvars.iv.next, 1024
671  br i1 %exitcond, label %for.end, label %for.body
672
673for.end:
674  ret float %min.red.0
675}
676
677; CHECK-LABEL: @inverted_min_red_float(
678; CHECK: fcmp fast ogt <2 x float>
679; CHECK: select <2 x i1>
680; CHECK: middle.block
681; CHECK: fcmp fast olt <2 x float>
682; CHECK: select fast <2 x i1>
683
684define float @inverted_min_red_float(float %min) #0 {
685entry:
686  br label %for.body
687
688for.body:
689  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
690  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
691  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
692  %0 = load float, float* %arrayidx, align 4
693  %cmp3 = fcmp fast ogt float %0, %min.red.08
694  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
695  %indvars.iv.next = add i64 %indvars.iv, 1
696  %exitcond = icmp eq i64 %indvars.iv.next, 1024
697  br i1 %exitcond, label %for.end, label %for.body
698
699for.end:
700  ret float %min.red.0
701}
702
703; CHECK-LABEL: @inverted_min_red_float_ge(
704; CHECK: fcmp fast oge <2 x float>
705; CHECK: select <2 x i1>
706; CHECK: middle.block
707; CHECK: fcmp fast olt <2 x float>
708; CHECK: select fast <2 x i1>
709
710define float @inverted_min_red_float_ge(float %min) #0 {
711entry:
712  br label %for.body
713
714for.body:
715  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
716  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
717  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
718  %0 = load float, float* %arrayidx, align 4
719  %cmp3 = fcmp fast oge float %0, %min.red.08
720  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
721  %indvars.iv.next = add i64 %indvars.iv, 1
722  %exitcond = icmp eq i64 %indvars.iv.next, 1024
723  br i1 %exitcond, label %for.end, label %for.body
724
725for.end:
726  ret float %min.red.0
727}
728
729; CHECK-LABEL: @unordered_min_red_float(
730; CHECK: fcmp fast ult <2 x float>
731; CHECK: select <2 x i1>
732; CHECK: middle.block
733; CHECK: fcmp fast olt <2 x float>
734; CHECK: select fast <2 x i1>
735
736define float @unordered_min_red_float(float %min) #0 {
737entry:
738  br label %for.body
739
740for.body:
741  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
742  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
743  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
744  %0 = load float, float* %arrayidx, align 4
745  %cmp3 = fcmp fast ult float %0, %min.red.08
746  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
747  %indvars.iv.next = add i64 %indvars.iv, 1
748  %exitcond = icmp eq i64 %indvars.iv.next, 1024
749  br i1 %exitcond, label %for.end, label %for.body
750
751for.end:
752  ret float %min.red.0
753}
754
755; CHECK-LABEL: @unordered_min_red_float_le(
756; CHECK: fcmp fast ule <2 x float>
757; CHECK: select <2 x i1>
758; CHECK: middle.block
759; CHECK: fcmp fast olt <2 x float>
760; CHECK: select fast <2 x i1>
761
762define float @unordered_min_red_float_le(float %min) #0 {
763entry:
764  br label %for.body
765
766for.body:
767  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
768  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
769  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
770  %0 = load float, float* %arrayidx, align 4
771  %cmp3 = fcmp fast ule float %0, %min.red.08
772  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
773  %indvars.iv.next = add i64 %indvars.iv, 1
774  %exitcond = icmp eq i64 %indvars.iv.next, 1024
775  br i1 %exitcond, label %for.end, label %for.body
776
777for.end:
778  ret float %min.red.0
779}
780
781; CHECK-LABEL: @inverted_unordered_min_red_float(
782; CHECK: fcmp fast ugt <2 x float>
783; CHECK: select <2 x i1>
784; CHECK: middle.block
785; CHECK: fcmp fast olt <2 x float>
786; CHECK: select fast <2 x i1>
787
788define float @inverted_unordered_min_red_float(float %min) #0 {
789entry:
790  br label %for.body
791
792for.body:
793  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
794  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
795  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
796  %0 = load float, float* %arrayidx, align 4
797  %cmp3 = fcmp fast ugt float %0, %min.red.08
798  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
799  %indvars.iv.next = add i64 %indvars.iv, 1
800  %exitcond = icmp eq i64 %indvars.iv.next, 1024
801  br i1 %exitcond, label %for.end, label %for.body
802
803for.end:
804  ret float %min.red.0
805}
806
807; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
808; CHECK: fcmp fast uge <2 x float>
809; CHECK: select <2 x i1>
810; CHECK: middle.block
811; CHECK: fcmp fast olt <2 x float>
812; CHECK: select fast <2 x i1>
813
814define float @inverted_unordered_min_red_float_ge(float %min) #0 {
815entry:
816  br label %for.body
817
818for.body:
819  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
820  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
821  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
822  %0 = load float, float* %arrayidx, align 4
823  %cmp3 = fcmp fast uge float %0, %min.red.08
824  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
825  %indvars.iv.next = add i64 %indvars.iv, 1
826  %exitcond = icmp eq i64 %indvars.iv.next, 1024
827  br i1 %exitcond, label %for.end, label %for.body
828
829for.end:
830  ret float %min.red.0
831}
832
833; Make sure we handle doubles, too.
834; CHECK-LABEL: @min_red_double(
835; CHECK: fcmp fast olt <2 x double>
836; CHECK: select <2 x i1>
837; CHECK: middle.block
838; CHECK: fcmp fast olt <2 x double>
839; CHECK: select fast <2 x i1>
840
841define double @min_red_double(double %min) #0 {
842entry:
843  br label %for.body
844
845for.body:
846  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
847  %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
848  %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
849  %0 = load double, double* %arrayidx, align 4
850  %cmp3 = fcmp fast olt double %0, %min.red.08
851  %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
852  %indvars.iv.next = add i64 %indvars.iv, 1
853  %exitcond = icmp eq i64 %indvars.iv.next, 1024
854  br i1 %exitcond, label %for.end, label %for.body
855
856for.end:
857  ret double %min.red.0
858}
859
860
861; Don't this into a max reduction. The no-nans-fp-math attribute is missing
862; CHECK-LABEL: @max_red_float_nans(
863; CHECK-NOT: <2 x float>
864
865define float @max_red_float_nans(float %max) {
866entry:
867  br label %for.body
868
869for.body:
870  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
871  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
872  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
873  %0 = load float, float* %arrayidx, align 4
874  %cmp3 = fcmp fast ogt float %0, %max.red.08
875  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
876  %indvars.iv.next = add i64 %indvars.iv, 1
877  %exitcond = icmp eq i64 %indvars.iv.next, 1024
878  br i1 %exitcond, label %for.end, label %for.body
879
880for.end:
881  ret float %max.red.0
882}
883
884
885attributes #0 = { "no-nans-fp-math"="true" }
886