1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -S %s | FileCheck %s
3; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6target triple = "arm64-apple-ios5.0.0"
7
8define void @select_umin_8xi16(i16* %ptr, i16 %x) {
9; CHECK-LABEL: @select_umin_8xi16(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
12; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
13; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
14; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
15; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
16; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
17; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
18; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
19; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
20; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
21; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
22; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
23; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
24; CHECK-NEXT:    ret void
25;
26entry:
27  %l.0 = load i16, i16* %ptr
28  %cmp.0 = icmp ult i16 %l.0, 16383
29  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
30  store i16 %s.0, i16* %ptr, align 2
31
32  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
33  %l.1 = load i16, i16* %gep.1
34  %cmp.1 = icmp ult i16 %l.1, 16383
35  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
36  store i16 %s.1, i16* %gep.1, align 2
37
38  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
39  %l.2 = load i16, i16* %gep.2
40  %cmp.2 = icmp ult i16 %l.2, 16383
41  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
42  store i16 %s.2, i16* %gep.2, align 2
43
44  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
45  %l.3 = load i16, i16* %gep.3
46  %cmp.3 = icmp ult i16 %l.3, 16383
47  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
48  store i16 %s.3, i16* %gep.3, align 2
49
50  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
51  %l.4 = load i16, i16* %gep.4
52  %cmp.4 = icmp ult i16 %l.4, 16383
53  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
54  store i16 %s.4, i16* %gep.4, align 2
55
56  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
57  %l.5 = load i16, i16* %gep.5
58  %cmp.5 = icmp ult i16 %l.5, 16383
59  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
60  store i16 %s.5, i16* %gep.5, align 2
61
62  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
63  %l.6 = load i16, i16* %gep.6
64  %cmp.6 = icmp ult i16 %l.6, 16383
65  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
66  store i16 %s.6, i16* %gep.6, align 2
67
68  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
69  %l.7 = load i16, i16* %gep.7
70  %cmp.7 = icmp ult i16 %l.7, 16383
71  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
72  store i16 %s.7, i16* %gep.7, align 2
73  ret void
74}
75
76define void @select_umin_4xi32(i32* %ptr, i32 %x) {
77; CHECK-LABEL: @select_umin_4xi32(
78; CHECK-NEXT:  entry:
79; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
80; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
81; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
82; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
83; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
84; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
85; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
86; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
87; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
88; CHECK-NEXT:    ret void
89;
90entry:
91  %l.0 = load i32, i32* %ptr
92  %cmp.0 = icmp ult i32 %l.0, 16383
93  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
94  store i32 %s.0, i32* %ptr, align 4
95
96  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
97  %l.1 = load i32, i32* %gep.1
98  %cmp.1 = icmp ult i32 %l.1, 16383
99  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
100  store i32 %s.1, i32* %gep.1, align 4
101
102  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
103  %l.2 = load i32, i32* %gep.2
104  %cmp.2 = icmp ult i32 %l.2, 16383
105  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
106  store i32 %s.2, i32* %gep.2, align 4
107
108  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
109  %l.3 = load i32, i32* %gep.3
110  %cmp.3 = icmp ult i32 %l.3, 16383
111  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
112  store i32 %s.3, i32* %gep.3, align 4
113
114  ret void
115}
116
117define void @select_ule_ugt_mix_4xi32(i32* %ptr, i32 %x) {
118; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
119; CHECK-NEXT:  entry:
120; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
121; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
122; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
123; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
124; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
125; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
126; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
127; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
128; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
129; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
130; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
131; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
132; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
133; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
134; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
135; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
136; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
137; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
138; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
139; CHECK-NEXT:    ret void
140;
141entry:
142  %l.0 = load i32, i32* %ptr
143  %cmp.0 = icmp ult i32 %l.0, 16383
144  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
145  store i32 %s.0, i32* %ptr, align 4
146
147  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
148  %l.1 = load i32, i32* %gep.1
149  %cmp.1 = icmp ugt i32 %l.1, 16383
150  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
151  store i32 %s.1, i32* %gep.1, align 4
152
153  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
154  %l.2 = load i32, i32* %gep.2
155  %cmp.2 = icmp ult i32 %l.2, 16383
156  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
157  store i32 %s.2, i32* %gep.2, align 4
158
159  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
160  %l.3 = load i32, i32* %gep.3
161  %cmp.3 = icmp ugt i32 %l.3, 16383
162  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
163  store i32 %s.3, i32* %gep.3, align 4
164
165  ret void
166}
167
168; There is no <2 x i64> version of umin, but we can efficiently lower
169; compare/select pairs with uniform predicates.
170define void @select_umin_2xi64(i64* %ptr, i64 %x) {
171; CHECK-LABEL: @select_umin_2xi64(
172; CHECK-NEXT:  entry:
173; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
174; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
175; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
176; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
177; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
178; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
179; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
180; CHECK-NEXT:    ret void
181;
182entry:
183  %l.0 = load i64, i64* %ptr
184  %cmp.0 = icmp ult i64 %l.0, 16383
185  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
186  store i64 %s.0, i64* %ptr, align 4
187
188  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
189  %l.1 = load i64, i64* %gep.1
190  %cmp.1 = icmp ult i64 %l.1, 16383
191  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
192  store i64 %s.1, i64* %gep.1, align 4
193
194  ret void
195}
196
197
198define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
199; CHECK-LABEL: @select_umin_ule_8xi16(
200; CHECK-NEXT:  entry:
201; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
202; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
203; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
204; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
205; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
206; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
207; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
208; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
209; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
210; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
211; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
212; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
213; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
214; CHECK-NEXT:    ret void
215;
216entry:
217  %l.0 = load i16, i16* %ptr
218  %cmp.0 = icmp ule i16 %l.0, 16383
219  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
220  store i16 %s.0, i16* %ptr, align 2
221
222  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
223  %l.1 = load i16, i16* %gep.1
224  %cmp.1 = icmp ule i16 %l.1, 16383
225  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
226  store i16 %s.1, i16* %gep.1, align 2
227
228  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
229  %l.2 = load i16, i16* %gep.2
230  %cmp.2 = icmp ule i16 %l.2, 16383
231  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
232  store i16 %s.2, i16* %gep.2, align 2
233
234  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
235  %l.3 = load i16, i16* %gep.3
236  %cmp.3 = icmp ule i16 %l.3, 16383
237  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
238  store i16 %s.3, i16* %gep.3, align 2
239
240  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
241  %l.4 = load i16, i16* %gep.4
242  %cmp.4 = icmp ule i16 %l.4, 16383
243  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
244  store i16 %s.4, i16* %gep.4, align 2
245
246  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
247  %l.5 = load i16, i16* %gep.5
248  %cmp.5 = icmp ule i16 %l.5, 16383
249  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
250  store i16 %s.5, i16* %gep.5, align 2
251
252  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
253  %l.6 = load i16, i16* %gep.6
254  %cmp.6 = icmp ule i16 %l.6, 16383
255  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
256  store i16 %s.6, i16* %gep.6, align 2
257
258  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
259  %l.7 = load i16, i16* %gep.7
260  %cmp.7 = icmp ule i16 %l.7, 16383
261  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
262  store i16 %s.7, i16* %gep.7, align 2
263  ret void
264}
265
266define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
267; CHECK-LABEL: @select_umin_ule_4xi32(
268; CHECK-NEXT:  entry:
269; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
270; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
271; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
272; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
273; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
274; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
275; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
276; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
277; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
278; CHECK-NEXT:    ret void
279;
280entry:
281  %l.0 = load i32, i32* %ptr
282  %cmp.0 = icmp ule i32 %l.0, 16383
283  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
284  store i32 %s.0, i32* %ptr, align 4
285
286  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
287  %l.1 = load i32, i32* %gep.1
288  %cmp.1 = icmp ule i32 %l.1, 16383
289  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
290  store i32 %s.1, i32* %gep.1, align 4
291
292  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
293  %l.2 = load i32, i32* %gep.2
294  %cmp.2 = icmp ule i32 %l.2, 16383
295  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
296  store i32 %s.2, i32* %gep.2, align 4
297
298  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
299  %l.3 = load i32, i32* %gep.3
300  %cmp.3 = icmp ule i32 %l.3, 16383
301  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
302  store i32 %s.3, i32* %gep.3, align 4
303
304  ret void
305}
306
307; There is no <2 x i64> version of umin, but we can efficiently lower
308; compare/select pairs with uniform predicates.
309define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
310; CHECK-LABEL: @select_umin_ule_2xi64(
311; CHECK-NEXT:  entry:
312; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
313; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
314; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
315; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
316; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
317; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
318; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
319; CHECK-NEXT:    ret void
320;
321entry:
322  %l.0 = load i64, i64* %ptr
323  %cmp.0 = icmp ule i64 %l.0, 16383
324  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
325  store i64 %s.0, i64* %ptr, align 4
326
327  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
328  %l.1 = load i64, i64* %gep.1
329  %cmp.1 = icmp ule i64 %l.1, 16383
330  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
331  store i64 %s.1, i64* %gep.1, align 4
332
333  ret void
334}
335
336define void @select_smin_8xi16(i16* %ptr, i16 %x) {
337; CHECK-LABEL: @select_smin_8xi16(
338; CHECK-NEXT:  entry:
339; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
340; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
341; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
342; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
343; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
344; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
345; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
346; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
347; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
348; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
349; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
350; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
351; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
352; CHECK-NEXT:    ret void
353;
354entry:
355  %l.0 = load i16, i16* %ptr
356  %cmp.0 = icmp slt i16 %l.0, 16383
357  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
358  store i16 %s.0, i16* %ptr, align 2
359
360  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
361  %l.1 = load i16, i16* %gep.1
362  %cmp.1 = icmp slt i16 %l.1, 16383
363  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
364  store i16 %s.1, i16* %gep.1, align 2
365
366  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
367  %l.2 = load i16, i16* %gep.2
368  %cmp.2 = icmp slt i16 %l.2, 16383
369  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
370  store i16 %s.2, i16* %gep.2, align 2
371
372  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
373  %l.3 = load i16, i16* %gep.3
374  %cmp.3 = icmp slt i16 %l.3, 16383
375  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
376  store i16 %s.3, i16* %gep.3, align 2
377
378  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
379  %l.4 = load i16, i16* %gep.4
380  %cmp.4 = icmp slt i16 %l.4, 16383
381  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
382  store i16 %s.4, i16* %gep.4, align 2
383
384  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
385  %l.5 = load i16, i16* %gep.5
386  %cmp.5 = icmp slt i16 %l.5, 16383
387  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
388  store i16 %s.5, i16* %gep.5, align 2
389
390  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
391  %l.6 = load i16, i16* %gep.6
392  %cmp.6 = icmp slt i16 %l.6, 16383
393  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
394  store i16 %s.6, i16* %gep.6, align 2
395
396  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
397  %l.7 = load i16, i16* %gep.7
398  %cmp.7 = icmp slt i16 %l.7, 16383
399  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
400  store i16 %s.7, i16* %gep.7, align 2
401  ret void
402}
403
404define void @select_smin_4xi32(i32* %ptr, i32 %x) {
405; CHECK-LABEL: @select_smin_4xi32(
406; CHECK-NEXT:  entry:
407; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
408; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
409; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
410; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
411; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
412; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
413; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
414; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
415; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
416; CHECK-NEXT:    ret void
417;
418entry:
419  %l.0 = load i32, i32* %ptr
420  %cmp.0 = icmp slt i32 %l.0, 16383
421  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
422  store i32 %s.0, i32* %ptr, align 4
423
424  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
425  %l.1 = load i32, i32* %gep.1
426  %cmp.1 = icmp slt i32 %l.1, 16383
427  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
428  store i32 %s.1, i32* %gep.1, align 4
429
430  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
431  %l.2 = load i32, i32* %gep.2
432  %cmp.2 = icmp slt i32 %l.2, 16383
433  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
434  store i32 %s.2, i32* %gep.2, align 4
435
436  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
437  %l.3 = load i32, i32* %gep.3
438  %cmp.3 = icmp slt i32 %l.3, 16383
439  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
440  store i32 %s.3, i32* %gep.3, align 4
441
442  ret void
443}
444
445; There is no <2 x i64> version of smin, but we can efficiently lower
446; compare/select pairs with uniform predicates.
447define void @select_smin_2xi64(i64* %ptr, i64 %x) {
448; CHECK-LABEL: @select_smin_2xi64(
449; CHECK-NEXT:  entry:
450; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
451; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
452; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
453; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
454; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
455; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
456; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
457; CHECK-NEXT:    ret void
458;
459entry:
460  %l.0 = load i64, i64* %ptr
461  %cmp.0 = icmp slt i64 %l.0, 16383
462  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
463  store i64 %s.0, i64* %ptr, align 4
464
465  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
466  %l.1 = load i64, i64* %gep.1
467  %cmp.1 = icmp slt i64 %l.1, 16383
468  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
469  store i64 %s.1, i64* %gep.1, align 4
470
471  ret void
472}
473
474define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
475; CHECK-LABEL: @select_smin_sle_8xi16(
476; CHECK-NEXT:  entry:
477; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
478; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
479; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
480; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
481; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
482; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
483; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
484; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
485; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
486; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
487; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
488; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
489; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
490; CHECK-NEXT:    ret void
491;
492entry:
493  %l.0 = load i16, i16* %ptr
494  %cmp.0 = icmp sle i16 %l.0, 16383
495  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
496  store i16 %s.0, i16* %ptr, align 2
497
498  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
499  %l.1 = load i16, i16* %gep.1
500  %cmp.1 = icmp sle i16 %l.1, 16383
501  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
502  store i16 %s.1, i16* %gep.1, align 2
503
504  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
505  %l.2 = load i16, i16* %gep.2
506  %cmp.2 = icmp sle i16 %l.2, 16383
507  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
508  store i16 %s.2, i16* %gep.2, align 2
509
510  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
511  %l.3 = load i16, i16* %gep.3
512  %cmp.3 = icmp sle i16 %l.3, 16383
513  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
514  store i16 %s.3, i16* %gep.3, align 2
515
516  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
517  %l.4 = load i16, i16* %gep.4
518  %cmp.4 = icmp sle i16 %l.4, 16383
519  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
520  store i16 %s.4, i16* %gep.4, align 2
521
522  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
523  %l.5 = load i16, i16* %gep.5
524  %cmp.5 = icmp sle i16 %l.5, 16383
525  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
526  store i16 %s.5, i16* %gep.5, align 2
527
528  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
529  %l.6 = load i16, i16* %gep.6
530  %cmp.6 = icmp sle i16 %l.6, 16383
531  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
532  store i16 %s.6, i16* %gep.6, align 2
533
534  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
535  %l.7 = load i16, i16* %gep.7
536  %cmp.7 = icmp sle i16 %l.7, 16383
537  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
538  store i16 %s.7, i16* %gep.7, align 2
539  ret void
540}
541
542define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
543; CHECK-LABEL: @select_smin_sle_4xi32(
544; CHECK-NEXT:  entry:
545; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
546; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
547; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
548; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
549; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
550; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
551; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
552; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
553; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
554; CHECK-NEXT:    ret void
555;
556entry:
557  %l.0 = load i32, i32* %ptr
558  %cmp.0 = icmp sle i32 %l.0, 16383
559  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
560  store i32 %s.0, i32* %ptr, align 4
561
562  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
563  %l.1 = load i32, i32* %gep.1
564  %cmp.1 = icmp sle i32 %l.1, 16383
565  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
566  store i32 %s.1, i32* %gep.1, align 4
567
568  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
569  %l.2 = load i32, i32* %gep.2
570  %cmp.2 = icmp sle i32 %l.2, 16383
571  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
572  store i32 %s.2, i32* %gep.2, align 4
573
574  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
575  %l.3 = load i32, i32* %gep.3
576  %cmp.3 = icmp sle i32 %l.3, 16383
577  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
578  store i32 %s.3, i32* %gep.3, align 4
579
580  ret void
581}
582
583; There is no <2 x i64> version of smin, but we can efficiently lower
584; compare/select pairs with uniform predicates.
585define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
586; CHECK-LABEL: @select_smin_sle_2xi64(
587; CHECK-NEXT:  entry:
588; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
589; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
590; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
591; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
592; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
593; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
594; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
595; CHECK-NEXT:    ret void
596;
597entry:
598  %l.0 = load i64, i64* %ptr
599  %cmp.0 = icmp sle i64 %l.0, 16383
600  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
601  store i64 %s.0, i64* %ptr, align 4
602
603  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
604  %l.1 = load i64, i64* %gep.1
605  %cmp.1 = icmp sle i64 %l.1, 16383
606  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
607  store i64 %s.1, i64* %gep.1, align 4
608
609  ret void
610}
611define void @select_umax_8xi16(i16* %ptr, i16 %x) {
612; CHECK-LABEL: @select_umax_8xi16(
613; CHECK-NEXT:  entry:
614; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
615; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
616; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
617; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
618; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
619; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
620; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
621; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
622; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
623; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
624; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
625; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
626; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
627; CHECK-NEXT:    ret void
628;
629entry:
630  %l.0 = load i16, i16* %ptr
631  %cmp.0 = icmp ugt i16 %l.0, 16383
632  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
633  store i16 %s.0, i16* %ptr, align 2
634
635  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
636  %l.1 = load i16, i16* %gep.1
637  %cmp.1 = icmp ugt i16 %l.1, 16383
638  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
639  store i16 %s.1, i16* %gep.1, align 2
640
641  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
642  %l.2 = load i16, i16* %gep.2
643  %cmp.2 = icmp ugt i16 %l.2, 16383
644  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
645  store i16 %s.2, i16* %gep.2, align 2
646
647  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
648  %l.3 = load i16, i16* %gep.3
649  %cmp.3 = icmp ugt i16 %l.3, 16383
650  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
651  store i16 %s.3, i16* %gep.3, align 2
652
653  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
654  %l.4 = load i16, i16* %gep.4
655  %cmp.4 = icmp ugt i16 %l.4, 16383
656  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
657  store i16 %s.4, i16* %gep.4, align 2
658
659  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
660  %l.5 = load i16, i16* %gep.5
661  %cmp.5 = icmp ugt i16 %l.5, 16383
662  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
663  store i16 %s.5, i16* %gep.5, align 2
664
665  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
666  %l.6 = load i16, i16* %gep.6
667  %cmp.6 = icmp ugt i16 %l.6, 16383
668  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
669  store i16 %s.6, i16* %gep.6, align 2
670
671  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
672  %l.7 = load i16, i16* %gep.7
673  %cmp.7 = icmp ugt i16 %l.7, 16383
674  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
675  store i16 %s.7, i16* %gep.7, align 2
676  ret void
677}
678
679define void @select_umax_4xi32(i32* %ptr, i32 %x) {
680; CHECK-LABEL: @select_umax_4xi32(
681; CHECK-NEXT:  entry:
682; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
683; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
684; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
685; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
686; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
687; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
688; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
689; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
690; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
691; CHECK-NEXT:    ret void
692;
693entry:
694  %l.0 = load i32, i32* %ptr
695  %cmp.0 = icmp ugt i32 %l.0, 16383
696  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
697  store i32 %s.0, i32* %ptr, align 4
698
699  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
700  %l.1 = load i32, i32* %gep.1
701  %cmp.1 = icmp ugt i32 %l.1, 16383
702  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
703  store i32 %s.1, i32* %gep.1, align 4
704
705  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
706  %l.2 = load i32, i32* %gep.2
707  %cmp.2 = icmp ugt i32 %l.2, 16383
708  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
709  store i32 %s.2, i32* %gep.2, align 4
710
711  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
712  %l.3 = load i32, i32* %gep.3
713  %cmp.3 = icmp ugt i32 %l.3, 16383
714  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
715  store i32 %s.3, i32* %gep.3, align 4
716
717  ret void
718}
719
720; There is no <2 x i64> version of umax, but we can efficiently lower
721; compare/select pairs with uniform predicates.
722define void @select_umax_2xi64(i64* %ptr, i64 %x) {
723; CHECK-LABEL: @select_umax_2xi64(
724; CHECK-NEXT:  entry:
725; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
726; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
727; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
728; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
729; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
730; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
731; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
732; CHECK-NEXT:    ret void
733;
734entry:
735  %l.0 = load i64, i64* %ptr
736  %cmp.0 = icmp ugt i64 %l.0, 16383
737  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
738  store i64 %s.0, i64* %ptr, align 4
739
740  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
741  %l.1 = load i64, i64* %gep.1
742  %cmp.1 = icmp ugt i64 %l.1, 16383
743  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
744  store i64 %s.1, i64* %gep.1, align 4
745
746  ret void
747}
748
749define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
750; CHECK-LABEL: @select_umax_uge_8xi16(
751; CHECK-NEXT:  entry:
752; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
753; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
754; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
755; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
756; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
757; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
758; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
759; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
760; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
761; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
762; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
763; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
764; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
765; CHECK-NEXT:    ret void
766;
767entry:
768  %l.0 = load i16, i16* %ptr
769  %cmp.0 = icmp uge i16 %l.0, 16383
770  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
771  store i16 %s.0, i16* %ptr, align 2
772
773  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
774  %l.1 = load i16, i16* %gep.1
775  %cmp.1 = icmp uge i16 %l.1, 16383
776  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
777  store i16 %s.1, i16* %gep.1, align 2
778
779  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
780  %l.2 = load i16, i16* %gep.2
781  %cmp.2 = icmp uge i16 %l.2, 16383
782  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
783  store i16 %s.2, i16* %gep.2, align 2
784
785  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
786  %l.3 = load i16, i16* %gep.3
787  %cmp.3 = icmp uge i16 %l.3, 16383
788  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
789  store i16 %s.3, i16* %gep.3, align 2
790
791  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
792  %l.4 = load i16, i16* %gep.4
793  %cmp.4 = icmp uge i16 %l.4, 16383
794  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
795  store i16 %s.4, i16* %gep.4, align 2
796
797  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
798  %l.5 = load i16, i16* %gep.5
799  %cmp.5 = icmp uge i16 %l.5, 16383
800  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
801  store i16 %s.5, i16* %gep.5, align 2
802
803  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
804  %l.6 = load i16, i16* %gep.6
805  %cmp.6 = icmp uge i16 %l.6, 16383
806  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
807  store i16 %s.6, i16* %gep.6, align 2
808
809  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
810  %l.7 = load i16, i16* %gep.7
811  %cmp.7 = icmp uge i16 %l.7, 16383
812  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
813  store i16 %s.7, i16* %gep.7, align 2
814  ret void
815}
816
817define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
818; CHECK-LABEL: @select_umax_uge_4xi32(
819; CHECK-NEXT:  entry:
820; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
821; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
822; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
823; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
824; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
825; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
826; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
827; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
828; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
829; CHECK-NEXT:    ret void
830;
831entry:
832  %l.0 = load i32, i32* %ptr
833  %cmp.0 = icmp uge i32 %l.0, 16383
834  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
835  store i32 %s.0, i32* %ptr, align 4
836
837  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
838  %l.1 = load i32, i32* %gep.1
839  %cmp.1 = icmp uge i32 %l.1, 16383
840  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
841  store i32 %s.1, i32* %gep.1, align 4
842
843  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
844  %l.2 = load i32, i32* %gep.2
845  %cmp.2 = icmp uge i32 %l.2, 16383
846  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
847  store i32 %s.2, i32* %gep.2, align 4
848
849  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
850  %l.3 = load i32, i32* %gep.3
851  %cmp.3 = icmp uge i32 %l.3, 16383
852  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
853  store i32 %s.3, i32* %gep.3, align 4
854
855  ret void
856}
857
858; There is no <2 x i64> version of umax, but we can efficiently lower
859; compare/select pairs with uniform predicates.
860define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
861; CHECK-LABEL: @select_umax_uge_2xi64(
862; CHECK-NEXT:  entry:
863; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
864; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
865; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
866; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
867; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
868; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
869; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
870; CHECK-NEXT:    ret void
871;
872entry:
873  %l.0 = load i64, i64* %ptr
874  %cmp.0 = icmp uge i64 %l.0, 16383
875  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
876  store i64 %s.0, i64* %ptr, align 4
877
878  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
879  %l.1 = load i64, i64* %gep.1
880  %cmp.1 = icmp uge i64 %l.1, 16383
881  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
882  store i64 %s.1, i64* %gep.1, align 4
883
884  ret void
885}
886
887define void @select_smax_8xi16(i16* %ptr, i16 %x) {
888; CHECK-LABEL: @select_smax_8xi16(
889; CHECK-NEXT:  entry:
890; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
891; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
892; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
893; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
894; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
895; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
896; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
897; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
898; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
899; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
900; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
901; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
902; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
903; CHECK-NEXT:    ret void
904;
905entry:
906  %l.0 = load i16, i16* %ptr
907  %cmp.0 = icmp sgt i16 %l.0, 16383
908  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
909  store i16 %s.0, i16* %ptr, align 2
910
911  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
912  %l.1 = load i16, i16* %gep.1
913  %cmp.1 = icmp sgt i16 %l.1, 16383
914  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
915  store i16 %s.1, i16* %gep.1, align 2
916
917  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
918  %l.2 = load i16, i16* %gep.2
919  %cmp.2 = icmp sgt i16 %l.2, 16383
920  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
921  store i16 %s.2, i16* %gep.2, align 2
922
923  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
924  %l.3 = load i16, i16* %gep.3
925  %cmp.3 = icmp sgt i16 %l.3, 16383
926  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
927  store i16 %s.3, i16* %gep.3, align 2
928
929  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
930  %l.4 = load i16, i16* %gep.4
931  %cmp.4 = icmp sgt i16 %l.4, 16383
932  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
933  store i16 %s.4, i16* %gep.4, align 2
934
935  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
936  %l.5 = load i16, i16* %gep.5
937  %cmp.5 = icmp sgt i16 %l.5, 16383
938  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
939  store i16 %s.5, i16* %gep.5, align 2
940
941  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
942  %l.6 = load i16, i16* %gep.6
943  %cmp.6 = icmp sgt i16 %l.6, 16383
944  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
945  store i16 %s.6, i16* %gep.6, align 2
946
947  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
948  %l.7 = load i16, i16* %gep.7
949  %cmp.7 = icmp sgt i16 %l.7, 16383
950  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
951  store i16 %s.7, i16* %gep.7, align 2
952  ret void
953}
954
955define void @select_smax_4xi32(i32* %ptr, i32 %x) {
956; CHECK-LABEL: @select_smax_4xi32(
957; CHECK-NEXT:  entry:
958; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
959; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
960; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
961; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
962; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
963; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
964; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
965; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
966; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
967; CHECK-NEXT:    ret void
968;
969entry:
970  %l.0 = load i32, i32* %ptr
971  %cmp.0 = icmp sgt i32 %l.0, 16383
972  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
973  store i32 %s.0, i32* %ptr, align 4
974
975  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
976  %l.1 = load i32, i32* %gep.1
977  %cmp.1 = icmp sgt i32 %l.1, 16383
978  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
979  store i32 %s.1, i32* %gep.1, align 4
980
981  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
982  %l.2 = load i32, i32* %gep.2
983  %cmp.2 = icmp sgt i32 %l.2, 16383
984  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
985  store i32 %s.2, i32* %gep.2, align 4
986
987  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
988  %l.3 = load i32, i32* %gep.3
989  %cmp.3 = icmp sgt i32 %l.3, 16383
990  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
991  store i32 %s.3, i32* %gep.3, align 4
992
993  ret void
994}
995
996; There is no <2 x i64> version of smax, but we can efficiently lower
997; compare/select pairs with uniform predicates.
998define void @select_smax_2xi64(i64* %ptr, i64 %x) {
999; CHECK-LABEL: @select_smax_2xi64(
1000; CHECK-NEXT:  entry:
1001; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
1002; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1003; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
1004; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
1005; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
1006; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1007; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
1008; CHECK-NEXT:    ret void
1009;
1010entry:
1011  %l.0 = load i64, i64* %ptr
1012  %cmp.0 = icmp sgt i64 %l.0, 16383
1013  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
1014  store i64 %s.0, i64* %ptr, align 4
1015
1016  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
1017  %l.1 = load i64, i64* %gep.1
1018  %cmp.1 = icmp sgt i64 %l.1, 16383
1019  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
1020  store i64 %s.1, i64* %gep.1, align 4
1021
1022  ret void
1023}
1024
1025
1026define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
1027; CHECK-LABEL: @select_smax_sge_8xi16(
1028; CHECK-NEXT:  entry:
1029; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
1030; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
1031; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
1032; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
1033; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
1034; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
1035; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
1036; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
1037; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
1038; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
1039; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
1040; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
1041; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
1042; CHECK-NEXT:    ret void
1043;
1044entry:
1045  %l.0 = load i16, i16* %ptr
1046  %cmp.0 = icmp sge i16 %l.0, 16383
1047  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
1048  store i16 %s.0, i16* %ptr, align 2
1049
1050  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
1051  %l.1 = load i16, i16* %gep.1
1052  %cmp.1 = icmp sge i16 %l.1, 16383
1053  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
1054  store i16 %s.1, i16* %gep.1, align 2
1055
1056  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
1057  %l.2 = load i16, i16* %gep.2
1058  %cmp.2 = icmp sge i16 %l.2, 16383
1059  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
1060  store i16 %s.2, i16* %gep.2, align 2
1061
1062  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
1063  %l.3 = load i16, i16* %gep.3
1064  %cmp.3 = icmp sge i16 %l.3, 16383
1065  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
1066  store i16 %s.3, i16* %gep.3, align 2
1067
1068  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
1069  %l.4 = load i16, i16* %gep.4
1070  %cmp.4 = icmp sge i16 %l.4, 16383
1071  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
1072  store i16 %s.4, i16* %gep.4, align 2
1073
1074  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
1075  %l.5 = load i16, i16* %gep.5
1076  %cmp.5 = icmp sge i16 %l.5, 16383
1077  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
1078  store i16 %s.5, i16* %gep.5, align 2
1079
1080  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
1081  %l.6 = load i16, i16* %gep.6
1082  %cmp.6 = icmp sge i16 %l.6, 16383
1083  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
1084  store i16 %s.6, i16* %gep.6, align 2
1085
1086  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
1087  %l.7 = load i16, i16* %gep.7
1088  %cmp.7 = icmp sge i16 %l.7, 16383
1089  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
1090  store i16 %s.7, i16* %gep.7, align 2
1091  ret void
1092}
1093
1094define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
1095; CHECK-LABEL: @select_smax_sge_4xi32(
1096; CHECK-NEXT:  entry:
1097; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
1098; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
1099; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
1100; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
1101; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1102; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
1103; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
1104; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
1105; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
1106; CHECK-NEXT:    ret void
1107;
1108entry:
1109  %l.0 = load i32, i32* %ptr
1110  %cmp.0 = icmp sge i32 %l.0, 16383
1111  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
1112  store i32 %s.0, i32* %ptr, align 4
1113
1114  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
1115  %l.1 = load i32, i32* %gep.1
1116  %cmp.1 = icmp sge i32 %l.1, 16383
1117  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
1118  store i32 %s.1, i32* %gep.1, align 4
1119
1120  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
1121  %l.2 = load i32, i32* %gep.2
1122  %cmp.2 = icmp sge i32 %l.2, 16383
1123  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
1124  store i32 %s.2, i32* %gep.2, align 4
1125
1126  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
1127  %l.3 = load i32, i32* %gep.3
1128  %cmp.3 = icmp sge i32 %l.3, 16383
1129  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
1130  store i32 %s.3, i32* %gep.3, align 4
1131
1132  ret void
1133}
1134
1135; There is no <2 x i64> version of smax, but we can efficiently lower
1136; compare/select pairs with uniform predicates.
1137define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
1138; CHECK-LABEL: @select_smax_sge_2xi64(
1139; CHECK-NEXT:  entry:
1140; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
1141; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1142; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
1143; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
1144; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
1145; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1146; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
1147; CHECK-NEXT:    ret void
1148;
1149entry:
1150  %l.0 = load i64, i64* %ptr
1151  %cmp.0 = icmp sge i64 %l.0, 16383
1152  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
1153  store i64 %s.0, i64* %ptr, align 4
1154
1155  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
1156  %l.1 = load i64, i64* %gep.1
1157  %cmp.1 = icmp sge i64 %l.1, 16383
1158  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
1159  store i64 %s.1, i64* %gep.1, align 4
1160
1161  ret void
1162}
1163