1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
3; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
4; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
5; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
6
7@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
8@arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
9@arrp = local_unnamed_addr global [32 x i32*] zeroinitializer, align 16
10@var = global i32 zeroinitializer, align 8
11
12define i32 @maxi8(i32) {
13; CHECK-LABEL: @maxi8(
14; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
15; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
16; CHECK-NEXT:    ret i32 [[TMP3]]
17;
18  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
19  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
20  %4 = icmp sgt i32 %2, %3
21  %5 = select i1 %4, i32 %2, i32 %3
22  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
23  %7 = icmp sgt i32 %5, %6
24  %8 = select i1 %7, i32 %5, i32 %6
25  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
26  %10 = icmp sgt i32 %8, %9
27  %11 = select i1 %10, i32 %8, i32 %9
28  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
29  %13 = icmp sgt i32 %11, %12
30  %14 = select i1 %13, i32 %11, i32 %12
31  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
32  %16 = icmp sgt i32 %14, %15
33  %17 = select i1 %16, i32 %14, i32 %15
34  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
35  %19 = icmp sgt i32 %17, %18
36  %20 = select i1 %19, i32 %17, i32 %18
37  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
38  %22 = icmp sgt i32 %20, %21
39  %23 = select i1 %22, i32 %20, i32 %21
40  ret i32 %23
41}
42
43define i32 @maxi16(i32) {
44; CHECK-LABEL: @maxi16(
45; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
46; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
47; CHECK-NEXT:    ret i32 [[TMP3]]
48;
49  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
50  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
51  %4 = icmp sgt i32 %2, %3
52  %5 = select i1 %4, i32 %2, i32 %3
53  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
54  %7 = icmp sgt i32 %5, %6
55  %8 = select i1 %7, i32 %5, i32 %6
56  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
57  %10 = icmp sgt i32 %8, %9
58  %11 = select i1 %10, i32 %8, i32 %9
59  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
60  %13 = icmp sgt i32 %11, %12
61  %14 = select i1 %13, i32 %11, i32 %12
62  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
63  %16 = icmp sgt i32 %14, %15
64  %17 = select i1 %16, i32 %14, i32 %15
65  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
66  %19 = icmp sgt i32 %17, %18
67  %20 = select i1 %19, i32 %17, i32 %18
68  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
69  %22 = icmp sgt i32 %20, %21
70  %23 = select i1 %22, i32 %20, i32 %21
71  %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
72  %25 = icmp sgt i32 %23, %24
73  %26 = select i1 %25, i32 %23, i32 %24
74  %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
75  %28 = icmp sgt i32 %26, %27
76  %29 = select i1 %28, i32 %26, i32 %27
77  %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
78  %31 = icmp sgt i32 %29, %30
79  %32 = select i1 %31, i32 %29, i32 %30
80  %33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
81  %34 = icmp sgt i32 %32, %33
82  %35 = select i1 %34, i32 %32, i32 %33
83  %36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
84  %37 = icmp sgt i32 %35, %36
85  %38 = select i1 %37, i32 %35, i32 %36
86  %39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
87  %40 = icmp sgt i32 %38, %39
88  %41 = select i1 %40, i32 %38, i32 %39
89  %42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
90  %43 = icmp sgt i32 %41, %42
91  %44 = select i1 %43, i32 %41, i32 %42
92  %45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
93  %46 = icmp sgt i32 %44, %45
94  %47 = select i1 %46, i32 %44, i32 %45
95  ret i32 %47
96}
97
98define i32 @maxi32(i32) {
99; CHECK-LABEL: @maxi32(
100; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
101; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
102; CHECK-NEXT:    ret i32 [[TMP3]]
103;
104  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
105  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
106  %4 = icmp sgt i32 %2, %3
107  %5 = select i1 %4, i32 %2, i32 %3
108  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
109  %7 = icmp sgt i32 %5, %6
110  %8 = select i1 %7, i32 %5, i32 %6
111  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
112  %10 = icmp sgt i32 %8, %9
113  %11 = select i1 %10, i32 %8, i32 %9
114  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
115  %13 = icmp sgt i32 %11, %12
116  %14 = select i1 %13, i32 %11, i32 %12
117  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
118  %16 = icmp sgt i32 %14, %15
119  %17 = select i1 %16, i32 %14, i32 %15
120  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
121  %19 = icmp sgt i32 %17, %18
122  %20 = select i1 %19, i32 %17, i32 %18
123  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
124  %22 = icmp sgt i32 %20, %21
125  %23 = select i1 %22, i32 %20, i32 %21
126  %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
127  %25 = icmp sgt i32 %23, %24
128  %26 = select i1 %25, i32 %23, i32 %24
129  %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
130  %28 = icmp sgt i32 %26, %27
131  %29 = select i1 %28, i32 %26, i32 %27
132  %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
133  %31 = icmp sgt i32 %29, %30
134  %32 = select i1 %31, i32 %29, i32 %30
135  %33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
136  %34 = icmp sgt i32 %32, %33
137  %35 = select i1 %34, i32 %32, i32 %33
138  %36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
139  %37 = icmp sgt i32 %35, %36
140  %38 = select i1 %37, i32 %35, i32 %36
141  %39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
142  %40 = icmp sgt i32 %38, %39
143  %41 = select i1 %40, i32 %38, i32 %39
144  %42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
145  %43 = icmp sgt i32 %41, %42
146  %44 = select i1 %43, i32 %41, i32 %42
147  %45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
148  %46 = icmp sgt i32 %44, %45
149  %47 = select i1 %46, i32 %44, i32 %45
150  %48 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 16), align 16
151  %49 = icmp sgt i32 %47, %48
152  %50 = select i1 %49, i32 %47, i32 %48
153  %51 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 17), align 4
154  %52 = icmp sgt i32 %50, %51
155  %53 = select i1 %52, i32 %50, i32 %51
156  %54 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 18), align 8
157  %55 = icmp sgt i32 %53, %54
158  %56 = select i1 %55, i32 %53, i32 %54
159  %57 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 19), align 4
160  %58 = icmp sgt i32 %56, %57
161  %59 = select i1 %58, i32 %56, i32 %57
162  %60 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 20), align 16
163  %61 = icmp sgt i32 %59, %60
164  %62 = select i1 %61, i32 %59, i32 %60
165  %63 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 21), align 4
166  %64 = icmp sgt i32 %62, %63
167  %65 = select i1 %64, i32 %62, i32 %63
168  %66 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 22), align 8
169  %67 = icmp sgt i32 %65, %66
170  %68 = select i1 %67, i32 %65, i32 %66
171  %69 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 23), align 4
172  %70 = icmp sgt i32 %68, %69
173  %71 = select i1 %70, i32 %68, i32 %69
174  %72 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 24), align 16
175  %73 = icmp sgt i32 %71, %72
176  %74 = select i1 %73, i32 %71, i32 %72
177  %75 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 25), align 4
178  %76 = icmp sgt i32 %74, %75
179  %77 = select i1 %76, i32 %74, i32 %75
180  %78 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 26), align 8
181  %79 = icmp sgt i32 %77, %78
182  %80 = select i1 %79, i32 %77, i32 %78
183  %81 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 27), align 4
184  %82 = icmp sgt i32 %80, %81
185  %83 = select i1 %82, i32 %80, i32 %81
186  %84 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 28), align 16
187  %85 = icmp sgt i32 %83, %84
188  %86 = select i1 %85, i32 %83, i32 %84
189  %87 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 29), align 4
190  %88 = icmp sgt i32 %86, %87
191  %89 = select i1 %88, i32 %86, i32 %87
192  %90 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 30), align 8
193  %91 = icmp sgt i32 %89, %90
194  %92 = select i1 %91, i32 %89, i32 %90
195  %93 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 31), align 4
196  %94 = icmp sgt i32 %92, %93
197  %95 = select i1 %94, i32 %92, i32 %93
198  ret i32 %95
199}
200
201; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
202; with fastmath on the select.
203define float @maxf8(float) {
204; DEFAULT-LABEL: @maxf8(
205; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
206; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
207; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
208; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
209; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
210; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
211; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
212; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
213; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
214; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
215; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
216; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
217; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
218; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
219; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
220; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
221; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
222; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
223; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
224; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
225; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
226; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
227; DEFAULT-NEXT:    ret float [[TMP23]]
228;
229; THRESH-LABEL: @maxf8(
230; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
231; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
232; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
233; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
234; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
235; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
236; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
237; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
238; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
239; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
240; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
241; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
242; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
243; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
244; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
245; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
246; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
247; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
248; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
249; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
250; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
251; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
252; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
253; THRESH-NEXT:    ret float [[TMP24]]
254;
255  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
256  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
257  %4 = fcmp fast ogt float %2, %3
258  %5 = select i1 %4, float %2, float %3
259  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
260  %7 = fcmp fast ogt float %5, %6
261  %8 = select i1 %7, float %5, float %6
262  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
263  %10 = fcmp fast ogt float %8, %9
264  %11 = select i1 %10, float %8, float %9
265  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
266  %13 = fcmp fast ogt float %11, %12
267  %14 = select i1 %13, float %11, float %12
268  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
269  %16 = fcmp fast ogt float %14, %15
270  %17 = select i1 %16, float %14, float %15
271  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
272  %19 = fcmp fast ogt float %17, %18
273  %20 = select i1 %19, float %17, float %18
274  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
275  %22 = fcmp fast ogt float %20, %21
276  %23 = select i1 %22, float %20, float %21
277  ret float %23
278}
279
280; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
281; with fastmath on the select.
282define float @maxf16(float) {
283; DEFAULT-LABEL: @maxf16(
284; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
285; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
286; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
287; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
288; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
289; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
290; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
291; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
292; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
293; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
294; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
295; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
296; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
297; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
298; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
299; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
300; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
301; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
302; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
303; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
304; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
305; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
306; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
307; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
308; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
309; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
310; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
311; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
312; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
313; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
314; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
315; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
316; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
317; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
318; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
319; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
320; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
321; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
322; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
323; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
324; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
325; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
326; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
327; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
328; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
329; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
330; DEFAULT-NEXT:    ret float [[TMP47]]
331;
332; THRESH-LABEL: @maxf16(
333; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
334; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
335; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
336; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
337; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
338; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
339; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
340; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
341; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
342; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
343; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
344; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
345; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
346; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
347; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
348; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
349; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
350; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
351; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
352; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
353; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
354; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
355; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
356; THRESH-NEXT:    [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
357; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
358; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
359; THRESH-NEXT:    [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
360; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
361; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
362; THRESH-NEXT:    [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
363; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
364; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
365; THRESH-NEXT:    [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
366; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
367; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
368; THRESH-NEXT:    [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
369; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
370; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
371; THRESH-NEXT:    [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
372; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
373; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
374; THRESH-NEXT:    [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
375; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
376; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
377; THRESH-NEXT:    [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
378; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
379; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
380; THRESH-NEXT:    ret float [[TMP48]]
381;
382  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
383  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
384  %4 = fcmp fast ogt float %2, %3
385  %5 = select i1 %4, float %2, float %3
386  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
387  %7 = fcmp fast ogt float %5, %6
388  %8 = select i1 %7, float %5, float %6
389  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
390  %10 = fcmp fast ogt float %8, %9
391  %11 = select i1 %10, float %8, float %9
392  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
393  %13 = fcmp fast ogt float %11, %12
394  %14 = select i1 %13, float %11, float %12
395  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
396  %16 = fcmp fast ogt float %14, %15
397  %17 = select i1 %16, float %14, float %15
398  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
399  %19 = fcmp fast ogt float %17, %18
400  %20 = select i1 %19, float %17, float %18
401  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
402  %22 = fcmp fast ogt float %20, %21
403  %23 = select i1 %22, float %20, float %21
404  %24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
405  %25 = fcmp fast ogt float %23, %24
406  %26 = select i1 %25, float %23, float %24
407  %27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
408  %28 = fcmp fast ogt float %26, %27
409  %29 = select i1 %28, float %26, float %27
410  %30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
411  %31 = fcmp fast ogt float %29, %30
412  %32 = select i1 %31, float %29, float %30
413  %33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
414  %34 = fcmp fast ogt float %32, %33
415  %35 = select i1 %34, float %32, float %33
416  %36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
417  %37 = fcmp fast ogt float %35, %36
418  %38 = select i1 %37, float %35, float %36
419  %39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
420  %40 = fcmp fast ogt float %38, %39
421  %41 = select i1 %40, float %38, float %39
422  %42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
423  %43 = fcmp fast ogt float %41, %42
424  %44 = select i1 %43, float %41, float %42
425  %45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
426  %46 = fcmp fast ogt float %44, %45
427  %47 = select i1 %46, float %44, float %45
428  ret float %47
429}
430
431; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
432; with fastmath on the select.
433define float @maxf32(float) {
434; DEFAULT-LABEL: @maxf32(
435; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
436; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
437; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
438; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
439; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
440; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
441; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
442; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
443; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
444; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
445; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
446; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
447; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
448; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
449; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
450; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
451; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
452; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
453; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
454; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
455; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
456; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
457; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
458; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
459; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
460; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
461; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
462; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
463; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
464; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
465; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
466; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
467; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
468; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
469; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
470; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
471; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
472; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
473; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
474; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
475; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
476; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
477; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
478; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
479; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
480; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
481; DEFAULT-NEXT:    [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
482; DEFAULT-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]]
483; DEFAULT-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]]
484; DEFAULT-NEXT:    [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
485; DEFAULT-NEXT:    [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]]
486; DEFAULT-NEXT:    [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]]
487; DEFAULT-NEXT:    [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
488; DEFAULT-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]]
489; DEFAULT-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]]
490; DEFAULT-NEXT:    [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
491; DEFAULT-NEXT:    [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]]
492; DEFAULT-NEXT:    [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]]
493; DEFAULT-NEXT:    [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
494; DEFAULT-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]]
495; DEFAULT-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]]
496; DEFAULT-NEXT:    [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
497; DEFAULT-NEXT:    [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]]
498; DEFAULT-NEXT:    [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]]
499; DEFAULT-NEXT:    [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
500; DEFAULT-NEXT:    [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]]
501; DEFAULT-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]]
502; DEFAULT-NEXT:    [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
503; DEFAULT-NEXT:    [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]]
504; DEFAULT-NEXT:    [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]]
505; DEFAULT-NEXT:    [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
506; DEFAULT-NEXT:    [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]]
507; DEFAULT-NEXT:    [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]]
508; DEFAULT-NEXT:    [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
509; DEFAULT-NEXT:    [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]]
510; DEFAULT-NEXT:    [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]]
511; DEFAULT-NEXT:    [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
512; DEFAULT-NEXT:    [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]]
513; DEFAULT-NEXT:    [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]]
514; DEFAULT-NEXT:    [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
515; DEFAULT-NEXT:    [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]]
516; DEFAULT-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]]
517; DEFAULT-NEXT:    [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
518; DEFAULT-NEXT:    [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]]
519; DEFAULT-NEXT:    [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]]
520; DEFAULT-NEXT:    [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
521; DEFAULT-NEXT:    [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]]
522; DEFAULT-NEXT:    [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]]
523; DEFAULT-NEXT:    [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
524; DEFAULT-NEXT:    [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]]
525; DEFAULT-NEXT:    [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]]
526; DEFAULT-NEXT:    [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
527; DEFAULT-NEXT:    [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]]
528; DEFAULT-NEXT:    [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]]
529; DEFAULT-NEXT:    ret float [[TMP95]]
530;
531; THRESH-LABEL: @maxf32(
532; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
533; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
534; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
535; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
536; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
537; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
538; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
539; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
540; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
541; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
542; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
543; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
544; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
545; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
546; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
547; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
548; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
549; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
550; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
551; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
552; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
553; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
554; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
555; THRESH-NEXT:    [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
556; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
557; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
558; THRESH-NEXT:    [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
559; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
560; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
561; THRESH-NEXT:    [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
562; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
563; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
564; THRESH-NEXT:    [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
565; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
566; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
567; THRESH-NEXT:    [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
568; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
569; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
570; THRESH-NEXT:    [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
571; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
572; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
573; THRESH-NEXT:    [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
574; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
575; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
576; THRESH-NEXT:    [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
577; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
578; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
579; THRESH-NEXT:    [[TMP49:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
580; THRESH-NEXT:    [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]]
581; THRESH-NEXT:    [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]]
582; THRESH-NEXT:    [[TMP52:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
583; THRESH-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]]
584; THRESH-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]]
585; THRESH-NEXT:    [[TMP55:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
586; THRESH-NEXT:    [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]]
587; THRESH-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]]
588; THRESH-NEXT:    [[TMP58:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
589; THRESH-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]]
590; THRESH-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]]
591; THRESH-NEXT:    [[TMP61:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
592; THRESH-NEXT:    [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]]
593; THRESH-NEXT:    [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]]
594; THRESH-NEXT:    [[TMP64:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
595; THRESH-NEXT:    [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]]
596; THRESH-NEXT:    [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]]
597; THRESH-NEXT:    [[TMP67:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
598; THRESH-NEXT:    [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]]
599; THRESH-NEXT:    [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]]
600; THRESH-NEXT:    [[TMP70:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
601; THRESH-NEXT:    [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]]
602; THRESH-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]]
603; THRESH-NEXT:    [[TMP73:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
604; THRESH-NEXT:    [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]]
605; THRESH-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]]
606; THRESH-NEXT:    [[TMP76:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
607; THRESH-NEXT:    [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]]
608; THRESH-NEXT:    [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]]
609; THRESH-NEXT:    [[TMP79:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
610; THRESH-NEXT:    [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]]
611; THRESH-NEXT:    [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]]
612; THRESH-NEXT:    [[TMP82:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
613; THRESH-NEXT:    [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]]
614; THRESH-NEXT:    [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]]
615; THRESH-NEXT:    [[TMP85:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
616; THRESH-NEXT:    [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]]
617; THRESH-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]]
618; THRESH-NEXT:    [[TMP88:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
619; THRESH-NEXT:    [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]]
620; THRESH-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]]
621; THRESH-NEXT:    [[TMP91:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
622; THRESH-NEXT:    [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]]
623; THRESH-NEXT:    [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]]
624; THRESH-NEXT:    [[TMP94:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
625; THRESH-NEXT:    [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]]
626; THRESH-NEXT:    [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]]
627; THRESH-NEXT:    ret float [[TMP96]]
628;
629  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
630  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
631  %4 = fcmp fast ogt float %2, %3
632  %5 = select i1 %4, float %2, float %3
633  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
634  %7 = fcmp fast ogt float %5, %6
635  %8 = select i1 %7, float %5, float %6
636  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
637  %10 = fcmp fast ogt float %8, %9
638  %11 = select i1 %10, float %8, float %9
639  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
640  %13 = fcmp fast ogt float %11, %12
641  %14 = select i1 %13, float %11, float %12
642  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
643  %16 = fcmp fast ogt float %14, %15
644  %17 = select i1 %16, float %14, float %15
645  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
646  %19 = fcmp fast ogt float %17, %18
647  %20 = select i1 %19, float %17, float %18
648  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
649  %22 = fcmp fast ogt float %20, %21
650  %23 = select i1 %22, float %20, float %21
651  %24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
652  %25 = fcmp fast ogt float %23, %24
653  %26 = select i1 %25, float %23, float %24
654  %27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
655  %28 = fcmp fast ogt float %26, %27
656  %29 = select i1 %28, float %26, float %27
657  %30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
658  %31 = fcmp fast ogt float %29, %30
659  %32 = select i1 %31, float %29, float %30
660  %33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
661  %34 = fcmp fast ogt float %32, %33
662  %35 = select i1 %34, float %32, float %33
663  %36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
664  %37 = fcmp fast ogt float %35, %36
665  %38 = select i1 %37, float %35, float %36
666  %39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
667  %40 = fcmp fast ogt float %38, %39
668  %41 = select i1 %40, float %38, float %39
669  %42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
670  %43 = fcmp fast ogt float %41, %42
671  %44 = select i1 %43, float %41, float %42
672  %45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
673  %46 = fcmp fast ogt float %44, %45
674  %47 = select i1 %46, float %44, float %45
675  %48 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
676  %49 = fcmp fast ogt float %47, %48
677  %50 = select i1 %49, float %47, float %48
678  %51 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
679  %52 = fcmp fast ogt float %50, %51
680  %53 = select i1 %52, float %50, float %51
681  %54 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
682  %55 = fcmp fast ogt float %53, %54
683  %56 = select i1 %55, float %53, float %54
684  %57 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
685  %58 = fcmp fast ogt float %56, %57
686  %59 = select i1 %58, float %56, float %57
687  %60 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
688  %61 = fcmp fast ogt float %59, %60
689  %62 = select i1 %61, float %59, float %60
690  %63 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
691  %64 = fcmp fast ogt float %62, %63
692  %65 = select i1 %64, float %62, float %63
693  %66 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
694  %67 = fcmp fast ogt float %65, %66
695  %68 = select i1 %67, float %65, float %66
696  %69 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
697  %70 = fcmp fast ogt float %68, %69
698  %71 = select i1 %70, float %68, float %69
699  %72 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
700  %73 = fcmp fast ogt float %71, %72
701  %74 = select i1 %73, float %71, float %72
702  %75 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
703  %76 = fcmp fast ogt float %74, %75
704  %77 = select i1 %76, float %74, float %75
705  %78 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
706  %79 = fcmp fast ogt float %77, %78
707  %80 = select i1 %79, float %77, float %78
708  %81 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
709  %82 = fcmp fast ogt float %80, %81
710  %83 = select i1 %82, float %80, float %81
711  %84 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
712  %85 = fcmp fast ogt float %83, %84
713  %86 = select i1 %85, float %83, float %84
714  %87 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
715  %88 = fcmp fast ogt float %86, %87
716  %89 = select i1 %88, float %86, float %87
717  %90 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
718  %91 = fcmp fast ogt float %89, %90
719  %92 = select i1 %91, float %89, float %90
720  %93 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
721  %94 = fcmp fast ogt float %92, %93
722  %95 = select i1 %94, float %92, float %93
723  ret float %95
724}
725
726define i32 @maxi8_mutiple_uses(i32) {
727; SSE-LABEL: @maxi8_mutiple_uses(
728; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
729; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
730; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
731; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
732; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
733; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
734; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
735; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
736; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
737; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
738; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
739; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
740; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
741; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
742; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
743; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
744; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
745; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
746; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
747; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
748; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
749; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
750; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
751; SSE-NEXT:    store i32 [[TMP24]], i32* @var, align 8
752; SSE-NEXT:    ret i32 [[TMP23]]
753;
754; AVX-LABEL: @maxi8_mutiple_uses(
755; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
756; AVX-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
757; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
758; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
759; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
760; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
761; AVX-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
762; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
763; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
764; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
765; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
766; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
767; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
768; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
769; AVX-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
770; AVX-NEXT:    store i32 [[TMP15]], i32* @var, align 8
771; AVX-NEXT:    ret i32 [[TMP14]]
772;
773; THRESH-LABEL: @maxi8_mutiple_uses(
774; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
775; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
776; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
777; THRESH-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
778; THRESH-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
779; THRESH-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
780; THRESH-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[TMP7]], i32 0
781; THRESH-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP3]], i32 1
782; THRESH-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
783; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1
784; THRESH-NEXT:    [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
785; THRESH-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]]
786; THRESH-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
787; THRESH-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
788; THRESH-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
789; THRESH-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]]
790; THRESH-NEXT:    [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
791; THRESH-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
792; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]]
793; THRESH-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
794; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
795; THRESH-NEXT:    store i32 [[TMP21]], i32* @var, align 8
796; THRESH-NEXT:    ret i32 [[TMP19]]
797;
798  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
799  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
800  %4 = icmp sgt i32 %2, %3
801  %5 = select i1 %4, i32 %2, i32 %3
802  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
803  %7 = icmp sgt i32 %5, %6
804  %8 = select i1 %7, i32 %5, i32 %6
805  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
806  %10 = icmp sgt i32 %8, %9
807  %11 = select i1 %10, i32 %8, i32 %9
808  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
809  %13 = icmp sgt i32 %11, %12
810  %14 = select i1 %13, i32 %11, i32 %12
811  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
812  %16 = icmp sgt i32 %14, %15
813  %17 = select i1 %16, i32 %14, i32 %15
814  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
815  %19 = icmp sgt i32 %17, %18
816  %20 = select i1 %19, i32 %17, i32 %18
817  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
818  %22 = icmp sgt i32 %20, %21
819  %23 = select i1 %22, i32 %20, i32 %21
820  %24 = select i1 %4, i32 3, i32 4
821  store i32 %24, i32* @var, align 8
822  ret i32 %23
823}
824
825define i32 @maxi8_wrong_parent(i32) {
826; SSE-LABEL: @maxi8_wrong_parent(
827; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
828; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
829; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
830; SSE-NEXT:    br label [[PP:%.*]]
831; SSE:       pp:
832; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
833; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
834; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
835; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
836; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
837; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
838; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
839; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
840; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
841; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
842; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
843; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
844; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
845; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
846; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
847; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
848; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
849; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
850; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
851; SSE-NEXT:    ret i32 [[TMP23]]
852;
853; AVX-LABEL: @maxi8_wrong_parent(
854; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
855; AVX-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
856; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
857; AVX-NEXT:    br label [[PP:%.*]]
858; AVX:       pp:
859; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
860; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
861; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
862; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
863; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
864; AVX-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
865; AVX-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
866; AVX-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
867; AVX-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
868; AVX-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
869; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
870; AVX-NEXT:    ret i32 [[OP_EXTRA]]
871;
872; THRESH-LABEL: @maxi8_wrong_parent(
873; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
874; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
875; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
876; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
877; THRESH-NEXT:    br label [[PP:%.*]]
878; THRESH:       pp:
879; THRESH-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
880; THRESH-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
881; THRESH-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
882; THRESH-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
883; THRESH-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
884; THRESH-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
885; THRESH-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
886; THRESH-NEXT:    [[TMP13:%.*]] = insertelement <2 x i1> undef, i1 [[TMP12]], i32 0
887; THRESH-NEXT:    [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1
888; THRESH-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i32 0
889; THRESH-NEXT:    [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1
890; THRESH-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[TMP8]], i32 0
891; THRESH-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
892; THRESH-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
893; THRESH-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
894; THRESH-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
895; THRESH-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
896; THRESH-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]]
897; THRESH-NEXT:    ret i32 [[OP_EXTRA]]
898;
899  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
900  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
901  %4 = icmp sgt i32 %2, %3
902  br label %pp
903
904pp:
905  %5 = select i1 %4, i32 %2, i32 %3
906  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
907  %7 = icmp sgt i32 %5, %6
908  %8 = select i1 %7, i32 %5, i32 %6
909  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
910  %10 = icmp sgt i32 %8, %9
911  %11 = select i1 %10, i32 %8, i32 %9
912  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
913  %13 = icmp sgt i32 %11, %12
914  %14 = select i1 %13, i32 %11, i32 %12
915  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
916  %16 = icmp sgt i32 %14, %15
917  %17 = select i1 %16, i32 %14, i32 %15
918  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
919  %19 = icmp sgt i32 %17, %18
920  %20 = select i1 %19, i32 %17, i32 %18
921  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
922  %22 = icmp sgt i32 %20, %21
923  %23 = select i1 %22, i32 %20, i32 %21
924  ret i32 %23
925}
926
927; PR38191 - We don't handle array-of-pointer reductions.
928define i32* @maxp8(i32) {
929; DEFAULT-LABEL: @maxp8(
930; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
931; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
932; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]]
933; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]]
934; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
935; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]]
936; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]]
937; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
938; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]]
939; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]]
940; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
941; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]]
942; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]]
943; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
944; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]]
945; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]]
946; DEFAULT-NEXT:    [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
947; DEFAULT-NEXT:    [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]]
948; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]]
949; DEFAULT-NEXT:    [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
950; DEFAULT-NEXT:    [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]]
951; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]]
952; DEFAULT-NEXT:    ret i32* [[TMP23]]
953;
954; THRESH-LABEL: @maxp8(
955; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32*>, <2 x i32*>* bitcast ([32 x i32*]* @arrp to <2 x i32*>*), align 16
956; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 0
957; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 1
958; THRESH-NEXT:    [[TMP5:%.*]] = icmp ugt i32* [[TMP3]], [[TMP4]]
959; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32* [[TMP3]], i32* [[TMP4]]
960; THRESH-NEXT:    [[TMP7:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
961; THRESH-NEXT:    [[TMP8:%.*]] = icmp ugt i32* [[TMP6]], [[TMP7]]
962; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32* [[TMP6]], i32* [[TMP7]]
963; THRESH-NEXT:    [[TMP10:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
964; THRESH-NEXT:    [[TMP11:%.*]] = icmp ugt i32* [[TMP9]], [[TMP10]]
965; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32* [[TMP9]], i32* [[TMP10]]
966; THRESH-NEXT:    [[TMP13:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
967; THRESH-NEXT:    [[TMP14:%.*]] = icmp ugt i32* [[TMP12]], [[TMP13]]
968; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32* [[TMP12]], i32* [[TMP13]]
969; THRESH-NEXT:    [[TMP16:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
970; THRESH-NEXT:    [[TMP17:%.*]] = icmp ugt i32* [[TMP15]], [[TMP16]]
971; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32* [[TMP15]], i32* [[TMP16]]
972; THRESH-NEXT:    [[TMP19:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
973; THRESH-NEXT:    [[TMP20:%.*]] = icmp ugt i32* [[TMP18]], [[TMP19]]
974; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32* [[TMP18]], i32* [[TMP19]]
975; THRESH-NEXT:    [[TMP22:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
976; THRESH-NEXT:    [[TMP23:%.*]] = icmp ugt i32* [[TMP21]], [[TMP22]]
977; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32* [[TMP21]], i32* [[TMP22]]
978; THRESH-NEXT:    ret i32* [[TMP24]]
979;
980  %2 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
981  %3 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
982  %4 = icmp ugt i32* %2, %3
983  %5 = select i1 %4, i32* %2, i32* %3
984  %6 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
985  %7 = icmp ugt i32* %5, %6
986  %8 = select i1 %7, i32* %5, i32* %6
987  %9 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
988  %10 = icmp ugt i32* %8, %9
989  %11 = select i1 %10, i32* %8, i32* %9
990  %12 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
991  %13 = icmp ugt i32* %11, %12
992  %14 = select i1 %13, i32* %11, i32* %12
993  %15 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
994  %16 = icmp ugt i32* %14, %15
995  %17 = select i1 %16, i32* %14, i32* %15
996  %18 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
997  %19 = icmp ugt i32* %17, %18
998  %20 = select i1 %19, i32* %17, i32* %18
999  %21 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
1000  %22 = icmp ugt i32* %20, %21
1001  %23 = select i1 %22, i32* %20, i32* %21
1002  ret i32* %23
1003}
1004