1; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-linux-gnu"
5
6;CHECK-LABEL: @sqrt_f32(
7;CHECK: llvm.sqrt.v4f32
8;CHECK: ret void
9define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
10entry:
11  %cmp6 = icmp sgt i32 %n, 0
12  br i1 %cmp6, label %for.body, label %for.end
13
14for.body:                                         ; preds = %entry, %for.body
15  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
16  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
17  %0 = load float, float* %arrayidx, align 4
18  %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
19  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
20  store float %call, float* %arrayidx2, align 4
21  %indvars.iv.next = add i64 %indvars.iv, 1
22  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
23  %exitcond = icmp eq i32 %lftr.wideiv, %n
24  br i1 %exitcond, label %for.end, label %for.body
25
26for.end:                                          ; preds = %for.body, %entry
27  ret void
28}
29
30declare float @llvm.sqrt.f32(float) nounwind readnone
31
32;CHECK-LABEL: @sqrt_f64(
33;CHECK: llvm.sqrt.v4f64
34;CHECK: ret void
35define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
36entry:
37  %cmp6 = icmp sgt i32 %n, 0
38  br i1 %cmp6, label %for.body, label %for.end
39
40for.body:                                         ; preds = %entry, %for.body
41  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
42  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
43  %0 = load double, double* %arrayidx, align 8
44  %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
45  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
46  store double %call, double* %arrayidx2, align 8
47  %indvars.iv.next = add i64 %indvars.iv, 1
48  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
49  %exitcond = icmp eq i32 %lftr.wideiv, %n
50  br i1 %exitcond, label %for.end, label %for.body
51
52for.end:                                          ; preds = %for.body, %entry
53  ret void
54}
55
56declare double @llvm.sqrt.f64(double) nounwind readnone
57
58;CHECK-LABEL: @sin_f32(
59;CHECK: llvm.sin.v4f32
60;CHECK: ret void
61define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
62entry:
63  %cmp6 = icmp sgt i32 %n, 0
64  br i1 %cmp6, label %for.body, label %for.end
65
66for.body:                                         ; preds = %entry, %for.body
67  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
68  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
69  %0 = load float, float* %arrayidx, align 4
70  %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
71  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
72  store float %call, float* %arrayidx2, align 4
73  %indvars.iv.next = add i64 %indvars.iv, 1
74  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
75  %exitcond = icmp eq i32 %lftr.wideiv, %n
76  br i1 %exitcond, label %for.end, label %for.body
77
78for.end:                                          ; preds = %for.body, %entry
79  ret void
80}
81
82declare float @llvm.sin.f32(float) nounwind readnone
83
84;CHECK-LABEL: @sin_f64(
85;CHECK: llvm.sin.v4f64
86;CHECK: ret void
87define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
88entry:
89  %cmp6 = icmp sgt i32 %n, 0
90  br i1 %cmp6, label %for.body, label %for.end
91
92for.body:                                         ; preds = %entry, %for.body
93  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
94  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
95  %0 = load double, double* %arrayidx, align 8
96  %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
97  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
98  store double %call, double* %arrayidx2, align 8
99  %indvars.iv.next = add i64 %indvars.iv, 1
100  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
101  %exitcond = icmp eq i32 %lftr.wideiv, %n
102  br i1 %exitcond, label %for.end, label %for.body
103
104for.end:                                          ; preds = %for.body, %entry
105  ret void
106}
107
108declare double @llvm.sin.f64(double) nounwind readnone
109
110;CHECK-LABEL: @cos_f32(
111;CHECK: llvm.cos.v4f32
112;CHECK: ret void
113define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
114entry:
115  %cmp6 = icmp sgt i32 %n, 0
116  br i1 %cmp6, label %for.body, label %for.end
117
118for.body:                                         ; preds = %entry, %for.body
119  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
120  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
121  %0 = load float, float* %arrayidx, align 4
122  %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
123  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
124  store float %call, float* %arrayidx2, align 4
125  %indvars.iv.next = add i64 %indvars.iv, 1
126  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
127  %exitcond = icmp eq i32 %lftr.wideiv, %n
128  br i1 %exitcond, label %for.end, label %for.body
129
130for.end:                                          ; preds = %for.body, %entry
131  ret void
132}
133
134declare float @llvm.cos.f32(float) nounwind readnone
135
136;CHECK-LABEL: @cos_f64(
137;CHECK: llvm.cos.v4f64
138;CHECK: ret void
139define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
140entry:
141  %cmp6 = icmp sgt i32 %n, 0
142  br i1 %cmp6, label %for.body, label %for.end
143
144for.body:                                         ; preds = %entry, %for.body
145  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
146  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
147  %0 = load double, double* %arrayidx, align 8
148  %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
149  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
150  store double %call, double* %arrayidx2, align 8
151  %indvars.iv.next = add i64 %indvars.iv, 1
152  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
153  %exitcond = icmp eq i32 %lftr.wideiv, %n
154  br i1 %exitcond, label %for.end, label %for.body
155
156for.end:                                          ; preds = %for.body, %entry
157  ret void
158}
159
160declare double @llvm.cos.f64(double) nounwind readnone
161
162;CHECK-LABEL: @exp_f32(
163;CHECK: llvm.exp.v4f32
164;CHECK: ret void
165define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
166entry:
167  %cmp6 = icmp sgt i32 %n, 0
168  br i1 %cmp6, label %for.body, label %for.end
169
170for.body:                                         ; preds = %entry, %for.body
171  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
172  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
173  %0 = load float, float* %arrayidx, align 4
174  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
175  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
176  store float %call, float* %arrayidx2, align 4
177  %indvars.iv.next = add i64 %indvars.iv, 1
178  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
179  %exitcond = icmp eq i32 %lftr.wideiv, %n
180  br i1 %exitcond, label %for.end, label %for.body
181
182for.end:                                          ; preds = %for.body, %entry
183  ret void
184}
185
186declare float @llvm.exp.f32(float) nounwind readnone
187
188;CHECK-LABEL: @exp_f64(
189;CHECK: llvm.exp.v4f64
190;CHECK: ret void
191define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
192entry:
193  %cmp6 = icmp sgt i32 %n, 0
194  br i1 %cmp6, label %for.body, label %for.end
195
196for.body:                                         ; preds = %entry, %for.body
197  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
198  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
199  %0 = load double, double* %arrayidx, align 8
200  %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
201  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
202  store double %call, double* %arrayidx2, align 8
203  %indvars.iv.next = add i64 %indvars.iv, 1
204  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
205  %exitcond = icmp eq i32 %lftr.wideiv, %n
206  br i1 %exitcond, label %for.end, label %for.body
207
208for.end:                                          ; preds = %for.body, %entry
209  ret void
210}
211
212declare double @llvm.exp.f64(double) nounwind readnone
213
214;CHECK-LABEL: @exp2_f32(
215;CHECK: llvm.exp2.v4f32
216;CHECK: ret void
217define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
218entry:
219  %cmp6 = icmp sgt i32 %n, 0
220  br i1 %cmp6, label %for.body, label %for.end
221
222for.body:                                         ; preds = %entry, %for.body
223  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
224  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
225  %0 = load float, float* %arrayidx, align 4
226  %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
227  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
228  store float %call, float* %arrayidx2, align 4
229  %indvars.iv.next = add i64 %indvars.iv, 1
230  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
231  %exitcond = icmp eq i32 %lftr.wideiv, %n
232  br i1 %exitcond, label %for.end, label %for.body
233
234for.end:                                          ; preds = %for.body, %entry
235  ret void
236}
237
238declare float @llvm.exp2.f32(float) nounwind readnone
239
240;CHECK-LABEL: @exp2_f64(
241;CHECK: llvm.exp2.v4f64
242;CHECK: ret void
243define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
244entry:
245  %cmp6 = icmp sgt i32 %n, 0
246  br i1 %cmp6, label %for.body, label %for.end
247
248for.body:                                         ; preds = %entry, %for.body
249  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
250  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
251  %0 = load double, double* %arrayidx, align 8
252  %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
253  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
254  store double %call, double* %arrayidx2, align 8
255  %indvars.iv.next = add i64 %indvars.iv, 1
256  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
257  %exitcond = icmp eq i32 %lftr.wideiv, %n
258  br i1 %exitcond, label %for.end, label %for.body
259
260for.end:                                          ; preds = %for.body, %entry
261  ret void
262}
263
264declare double @llvm.exp2.f64(double) nounwind readnone
265
266;CHECK-LABEL: @log_f32(
267;CHECK: llvm.log.v4f32
268;CHECK: ret void
269define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
270entry:
271  %cmp6 = icmp sgt i32 %n, 0
272  br i1 %cmp6, label %for.body, label %for.end
273
274for.body:                                         ; preds = %entry, %for.body
275  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
276  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
277  %0 = load float, float* %arrayidx, align 4
278  %call = tail call float @llvm.log.f32(float %0) nounwind readnone
279  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
280  store float %call, float* %arrayidx2, align 4
281  %indvars.iv.next = add i64 %indvars.iv, 1
282  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
283  %exitcond = icmp eq i32 %lftr.wideiv, %n
284  br i1 %exitcond, label %for.end, label %for.body
285
286for.end:                                          ; preds = %for.body, %entry
287  ret void
288}
289
290declare float @llvm.log.f32(float) nounwind readnone
291
292;CHECK-LABEL: @log_f64(
293;CHECK: llvm.log.v4f64
294;CHECK: ret void
295define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
296entry:
297  %cmp6 = icmp sgt i32 %n, 0
298  br i1 %cmp6, label %for.body, label %for.end
299
300for.body:                                         ; preds = %entry, %for.body
301  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
302  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
303  %0 = load double, double* %arrayidx, align 8
304  %call = tail call double @llvm.log.f64(double %0) nounwind readnone
305  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
306  store double %call, double* %arrayidx2, align 8
307  %indvars.iv.next = add i64 %indvars.iv, 1
308  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
309  %exitcond = icmp eq i32 %lftr.wideiv, %n
310  br i1 %exitcond, label %for.end, label %for.body
311
312for.end:                                          ; preds = %for.body, %entry
313  ret void
314}
315
316declare double @llvm.log.f64(double) nounwind readnone
317
318;CHECK-LABEL: @log10_f32(
319;CHECK: llvm.log10.v4f32
320;CHECK: ret void
321define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
322entry:
323  %cmp6 = icmp sgt i32 %n, 0
324  br i1 %cmp6, label %for.body, label %for.end
325
326for.body:                                         ; preds = %entry, %for.body
327  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
328  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
329  %0 = load float, float* %arrayidx, align 4
330  %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
331  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
332  store float %call, float* %arrayidx2, align 4
333  %indvars.iv.next = add i64 %indvars.iv, 1
334  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
335  %exitcond = icmp eq i32 %lftr.wideiv, %n
336  br i1 %exitcond, label %for.end, label %for.body
337
338for.end:                                          ; preds = %for.body, %entry
339  ret void
340}
341
342declare float @llvm.log10.f32(float) nounwind readnone
343
344;CHECK-LABEL: @log10_f64(
345;CHECK: llvm.log10.v4f64
346;CHECK: ret void
347define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
348entry:
349  %cmp6 = icmp sgt i32 %n, 0
350  br i1 %cmp6, label %for.body, label %for.end
351
352for.body:                                         ; preds = %entry, %for.body
353  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
354  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
355  %0 = load double, double* %arrayidx, align 8
356  %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
357  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
358  store double %call, double* %arrayidx2, align 8
359  %indvars.iv.next = add i64 %indvars.iv, 1
360  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
361  %exitcond = icmp eq i32 %lftr.wideiv, %n
362  br i1 %exitcond, label %for.end, label %for.body
363
364for.end:                                          ; preds = %for.body, %entry
365  ret void
366}
367
368declare double @llvm.log10.f64(double) nounwind readnone
369
370;CHECK-LABEL: @log2_f32(
371;CHECK: llvm.log2.v4f32
372;CHECK: ret void
373define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
374entry:
375  %cmp6 = icmp sgt i32 %n, 0
376  br i1 %cmp6, label %for.body, label %for.end
377
378for.body:                                         ; preds = %entry, %for.body
379  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
380  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
381  %0 = load float, float* %arrayidx, align 4
382  %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
383  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
384  store float %call, float* %arrayidx2, align 4
385  %indvars.iv.next = add i64 %indvars.iv, 1
386  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
387  %exitcond = icmp eq i32 %lftr.wideiv, %n
388  br i1 %exitcond, label %for.end, label %for.body
389
390for.end:                                          ; preds = %for.body, %entry
391  ret void
392}
393
394declare float @llvm.log2.f32(float) nounwind readnone
395
396;CHECK-LABEL: @log2_f64(
397;CHECK: llvm.log2.v4f64
398;CHECK: ret void
399define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
400entry:
401  %cmp6 = icmp sgt i32 %n, 0
402  br i1 %cmp6, label %for.body, label %for.end
403
404for.body:                                         ; preds = %entry, %for.body
405  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
406  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
407  %0 = load double, double* %arrayidx, align 8
408  %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
409  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
410  store double %call, double* %arrayidx2, align 8
411  %indvars.iv.next = add i64 %indvars.iv, 1
412  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
413  %exitcond = icmp eq i32 %lftr.wideiv, %n
414  br i1 %exitcond, label %for.end, label %for.body
415
416for.end:                                          ; preds = %for.body, %entry
417  ret void
418}
419
420declare double @llvm.log2.f64(double) nounwind readnone
421
422;CHECK-LABEL: @fabs_f32(
423;CHECK: llvm.fabs.v4f32
424;CHECK: ret void
425define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
426entry:
427  %cmp6 = icmp sgt i32 %n, 0
428  br i1 %cmp6, label %for.body, label %for.end
429
430for.body:                                         ; preds = %entry, %for.body
431  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
432  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
433  %0 = load float, float* %arrayidx, align 4
434  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
435  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
436  store float %call, float* %arrayidx2, align 4
437  %indvars.iv.next = add i64 %indvars.iv, 1
438  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
439  %exitcond = icmp eq i32 %lftr.wideiv, %n
440  br i1 %exitcond, label %for.end, label %for.body
441
442for.end:                                          ; preds = %for.body, %entry
443  ret void
444}
445
446declare float @llvm.fabs.f32(float) nounwind readnone
447
448define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
449entry:
450  %cmp6 = icmp sgt i32 %n, 0
451  br i1 %cmp6, label %for.body, label %for.end
452
453for.body:                                         ; preds = %entry, %for.body
454  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
455  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
456  %0 = load double, double* %arrayidx, align 8
457  %call = tail call double @llvm.fabs(double %0) nounwind readnone
458  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
459  store double %call, double* %arrayidx2, align 8
460  %indvars.iv.next = add i64 %indvars.iv, 1
461  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
462  %exitcond = icmp eq i32 %lftr.wideiv, %n
463  br i1 %exitcond, label %for.end, label %for.body
464
465for.end:                                          ; preds = %for.body, %entry
466  ret void
467}
468
469declare double @llvm.fabs(double) nounwind readnone
470
471;CHECK-LABEL: @copysign_f32(
472;CHECK: llvm.copysign.v4f32
473;CHECK: ret void
474define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
475entry:
476  %cmp6 = icmp sgt i32 %n, 0
477  br i1 %cmp6, label %for.body, label %for.end
478
479for.body:                                         ; preds = %entry, %for.body
480  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
481  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
482  %0 = load float, float* %arrayidx, align 4
483  %arrayidx1 = getelementptr inbounds float, float* %z, i64 %indvars.iv
484  %1 = load float, float* %arrayidx1, align 4
485  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
486  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
487  store float %call, float* %arrayidx2, align 4
488  %indvars.iv.next = add i64 %indvars.iv, 1
489  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
490  %exitcond = icmp eq i32 %lftr.wideiv, %n
491  br i1 %exitcond, label %for.end, label %for.body
492
493for.end:                                          ; preds = %for.body, %entry
494  ret void
495}
496
497declare float @llvm.copysign.f32(float, float) nounwind readnone
498
499define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
500entry:
501  %cmp6 = icmp sgt i32 %n, 0
502  br i1 %cmp6, label %for.body, label %for.end
503
504for.body:                                         ; preds = %entry, %for.body
505  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
506  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
507  %0 = load double, double* %arrayidx, align 8
508  %arrayidx1 = getelementptr inbounds double, double* %z, i64 %indvars.iv
509  %1 = load double, double* %arrayidx, align 8
510  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
511  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
512  store double %call, double* %arrayidx2, align 8
513  %indvars.iv.next = add i64 %indvars.iv, 1
514  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
515  %exitcond = icmp eq i32 %lftr.wideiv, %n
516  br i1 %exitcond, label %for.end, label %for.body
517
518for.end:                                          ; preds = %for.body, %entry
519  ret void
520}
521
522declare double @llvm.copysign(double, double) nounwind readnone
523
524;CHECK-LABEL: @floor_f32(
525;CHECK: llvm.floor.v4f32
526;CHECK: ret void
527define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
528entry:
529  %cmp6 = icmp sgt i32 %n, 0
530  br i1 %cmp6, label %for.body, label %for.end
531
532for.body:                                         ; preds = %entry, %for.body
533  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
534  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
535  %0 = load float, float* %arrayidx, align 4
536  %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
537  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
538  store float %call, float* %arrayidx2, align 4
539  %indvars.iv.next = add i64 %indvars.iv, 1
540  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
541  %exitcond = icmp eq i32 %lftr.wideiv, %n
542  br i1 %exitcond, label %for.end, label %for.body
543
544for.end:                                          ; preds = %for.body, %entry
545  ret void
546}
547
548declare float @llvm.floor.f32(float) nounwind readnone
549
550;CHECK-LABEL: @floor_f64(
551;CHECK: llvm.floor.v4f64
552;CHECK: ret void
553define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
554entry:
555  %cmp6 = icmp sgt i32 %n, 0
556  br i1 %cmp6, label %for.body, label %for.end
557
558for.body:                                         ; preds = %entry, %for.body
559  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
560  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
561  %0 = load double, double* %arrayidx, align 8
562  %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
563  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
564  store double %call, double* %arrayidx2, align 8
565  %indvars.iv.next = add i64 %indvars.iv, 1
566  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
567  %exitcond = icmp eq i32 %lftr.wideiv, %n
568  br i1 %exitcond, label %for.end, label %for.body
569
570for.end:                                          ; preds = %for.body, %entry
571  ret void
572}
573
574declare double @llvm.floor.f64(double) nounwind readnone
575
576;CHECK-LABEL: @ceil_f32(
577;CHECK: llvm.ceil.v4f32
578;CHECK: ret void
579define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
580entry:
581  %cmp6 = icmp sgt i32 %n, 0
582  br i1 %cmp6, label %for.body, label %for.end
583
584for.body:                                         ; preds = %entry, %for.body
585  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
586  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
587  %0 = load float, float* %arrayidx, align 4
588  %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
589  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
590  store float %call, float* %arrayidx2, align 4
591  %indvars.iv.next = add i64 %indvars.iv, 1
592  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
593  %exitcond = icmp eq i32 %lftr.wideiv, %n
594  br i1 %exitcond, label %for.end, label %for.body
595
596for.end:                                          ; preds = %for.body, %entry
597  ret void
598}
599
600declare float @llvm.ceil.f32(float) nounwind readnone
601
602;CHECK-LABEL: @ceil_f64(
603;CHECK: llvm.ceil.v4f64
604;CHECK: ret void
605define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
606entry:
607  %cmp6 = icmp sgt i32 %n, 0
608  br i1 %cmp6, label %for.body, label %for.end
609
610for.body:                                         ; preds = %entry, %for.body
611  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
612  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
613  %0 = load double, double* %arrayidx, align 8
614  %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
615  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
616  store double %call, double* %arrayidx2, align 8
617  %indvars.iv.next = add i64 %indvars.iv, 1
618  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
619  %exitcond = icmp eq i32 %lftr.wideiv, %n
620  br i1 %exitcond, label %for.end, label %for.body
621
622for.end:                                          ; preds = %for.body, %entry
623  ret void
624}
625
626declare double @llvm.ceil.f64(double) nounwind readnone
627
628;CHECK-LABEL: @trunc_f32(
629;CHECK: llvm.trunc.v4f32
630;CHECK: ret void
631define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
632entry:
633  %cmp6 = icmp sgt i32 %n, 0
634  br i1 %cmp6, label %for.body, label %for.end
635
636for.body:                                         ; preds = %entry, %for.body
637  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
638  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
639  %0 = load float, float* %arrayidx, align 4
640  %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
641  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
642  store float %call, float* %arrayidx2, align 4
643  %indvars.iv.next = add i64 %indvars.iv, 1
644  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
645  %exitcond = icmp eq i32 %lftr.wideiv, %n
646  br i1 %exitcond, label %for.end, label %for.body
647
648for.end:                                          ; preds = %for.body, %entry
649  ret void
650}
651
652declare float @llvm.trunc.f32(float) nounwind readnone
653
654;CHECK-LABEL: @trunc_f64(
655;CHECK: llvm.trunc.v4f64
656;CHECK: ret void
657define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
658entry:
659  %cmp6 = icmp sgt i32 %n, 0
660  br i1 %cmp6, label %for.body, label %for.end
661
662for.body:                                         ; preds = %entry, %for.body
663  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
664  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
665  %0 = load double, double* %arrayidx, align 8
666  %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
667  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
668  store double %call, double* %arrayidx2, align 8
669  %indvars.iv.next = add i64 %indvars.iv, 1
670  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
671  %exitcond = icmp eq i32 %lftr.wideiv, %n
672  br i1 %exitcond, label %for.end, label %for.body
673
674for.end:                                          ; preds = %for.body, %entry
675  ret void
676}
677
678declare double @llvm.trunc.f64(double) nounwind readnone
679
680;CHECK-LABEL: @rint_f32(
681;CHECK: llvm.rint.v4f32
682;CHECK: ret void
683define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
684entry:
685  %cmp6 = icmp sgt i32 %n, 0
686  br i1 %cmp6, label %for.body, label %for.end
687
688for.body:                                         ; preds = %entry, %for.body
689  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
690  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
691  %0 = load float, float* %arrayidx, align 4
692  %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
693  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
694  store float %call, float* %arrayidx2, align 4
695  %indvars.iv.next = add i64 %indvars.iv, 1
696  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
697  %exitcond = icmp eq i32 %lftr.wideiv, %n
698  br i1 %exitcond, label %for.end, label %for.body
699
700for.end:                                          ; preds = %for.body, %entry
701  ret void
702}
703
704declare float @llvm.rint.f32(float) nounwind readnone
705
706;CHECK-LABEL: @rint_f64(
707;CHECK: llvm.rint.v4f64
708;CHECK: ret void
709define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
710entry:
711  %cmp6 = icmp sgt i32 %n, 0
712  br i1 %cmp6, label %for.body, label %for.end
713
714for.body:                                         ; preds = %entry, %for.body
715  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
716  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
717  %0 = load double, double* %arrayidx, align 8
718  %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
719  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
720  store double %call, double* %arrayidx2, align 8
721  %indvars.iv.next = add i64 %indvars.iv, 1
722  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
723  %exitcond = icmp eq i32 %lftr.wideiv, %n
724  br i1 %exitcond, label %for.end, label %for.body
725
726for.end:                                          ; preds = %for.body, %entry
727  ret void
728}
729
730declare double @llvm.rint.f64(double) nounwind readnone
731
732;CHECK-LABEL: @nearbyint_f32(
733;CHECK: llvm.nearbyint.v4f32
734;CHECK: ret void
735define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
736entry:
737  %cmp6 = icmp sgt i32 %n, 0
738  br i1 %cmp6, label %for.body, label %for.end
739
740for.body:                                         ; preds = %entry, %for.body
741  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
742  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
743  %0 = load float, float* %arrayidx, align 4
744  %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
745  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
746  store float %call, float* %arrayidx2, align 4
747  %indvars.iv.next = add i64 %indvars.iv, 1
748  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
749  %exitcond = icmp eq i32 %lftr.wideiv, %n
750  br i1 %exitcond, label %for.end, label %for.body
751
752for.end:                                          ; preds = %for.body, %entry
753  ret void
754}
755
756declare float @llvm.nearbyint.f32(float) nounwind readnone
757
758;CHECK-LABEL: @nearbyint_f64(
759;CHECK: llvm.nearbyint.v4f64
760;CHECK: ret void
761define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
762entry:
763  %cmp6 = icmp sgt i32 %n, 0
764  br i1 %cmp6, label %for.body, label %for.end
765
766for.body:                                         ; preds = %entry, %for.body
767  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
768  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
769  %0 = load double, double* %arrayidx, align 8
770  %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
771  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
772  store double %call, double* %arrayidx2, align 8
773  %indvars.iv.next = add i64 %indvars.iv, 1
774  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
775  %exitcond = icmp eq i32 %lftr.wideiv, %n
776  br i1 %exitcond, label %for.end, label %for.body
777
778for.end:                                          ; preds = %for.body, %entry
779  ret void
780}
781
782declare double @llvm.nearbyint.f64(double) nounwind readnone
783
784;CHECK-LABEL: @round_f32(
785;CHECK: llvm.round.v4f32
786;CHECK: ret void
787define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
788entry:
789  %cmp6 = icmp sgt i32 %n, 0
790  br i1 %cmp6, label %for.body, label %for.end
791
792for.body:                                         ; preds = %entry, %for.body
793  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
794  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
795  %0 = load float, float* %arrayidx, align 4
796  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
797  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
798  store float %call, float* %arrayidx2, align 4
799  %indvars.iv.next = add i64 %indvars.iv, 1
800  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
801  %exitcond = icmp eq i32 %lftr.wideiv, %n
802  br i1 %exitcond, label %for.end, label %for.body
803
804for.end:                                          ; preds = %for.body, %entry
805  ret void
806}
807
808declare float @llvm.round.f32(float) nounwind readnone
809
810;CHECK-LABEL: @round_f64(
811;CHECK: llvm.round.v4f64
812;CHECK: ret void
813define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
814entry:
815  %cmp6 = icmp sgt i32 %n, 0
816  br i1 %cmp6, label %for.body, label %for.end
817
818for.body:                                         ; preds = %entry, %for.body
819  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
820  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
821  %0 = load double, double* %arrayidx, align 8
822  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
823  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
824  store double %call, double* %arrayidx2, align 8
825  %indvars.iv.next = add i64 %indvars.iv, 1
826  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
827  %exitcond = icmp eq i32 %lftr.wideiv, %n
828  br i1 %exitcond, label %for.end, label %for.body
829
830for.end:                                          ; preds = %for.body, %entry
831  ret void
832}
833
834declare double @llvm.round.f64(double) nounwind readnone
835
836;CHECK-LABEL: @fma_f32(
837;CHECK: llvm.fma.v4f32
838;CHECK: ret void
839define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
840entry:
841  %cmp12 = icmp sgt i32 %n, 0
842  br i1 %cmp12, label %for.body, label %for.end
843
844for.body:                                         ; preds = %entry, %for.body
845  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
846  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
847  %0 = load float, float* %arrayidx, align 4
848  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
849  %1 = load float, float* %arrayidx2, align 4
850  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
851  %2 = load float, float* %arrayidx4, align 4
852  %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
853  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
854  store float %3, float* %arrayidx6, align 4
855  %indvars.iv.next = add i64 %indvars.iv, 1
856  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
857  %exitcond = icmp eq i32 %lftr.wideiv, %n
858  br i1 %exitcond, label %for.end, label %for.body
859
860for.end:                                          ; preds = %for.body, %entry
861  ret void
862}
863
864declare float @llvm.fma.f32(float, float, float) nounwind readnone
865
866;CHECK-LABEL: @fma_f64(
867;CHECK: llvm.fma.v4f64
868;CHECK: ret void
869define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
870entry:
871  %cmp12 = icmp sgt i32 %n, 0
872  br i1 %cmp12, label %for.body, label %for.end
873
874for.body:                                         ; preds = %entry, %for.body
875  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
876  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
877  %0 = load double, double* %arrayidx, align 8
878  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
879  %1 = load double, double* %arrayidx2, align 8
880  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
881  %2 = load double, double* %arrayidx4, align 8
882  %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
883  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
884  store double %3, double* %arrayidx6, align 8
885  %indvars.iv.next = add i64 %indvars.iv, 1
886  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
887  %exitcond = icmp eq i32 %lftr.wideiv, %n
888  br i1 %exitcond, label %for.end, label %for.body
889
890for.end:                                          ; preds = %for.body, %entry
891  ret void
892}
893
894declare double @llvm.fma.f64(double, double, double) nounwind readnone
895
896;CHECK-LABEL: @fmuladd_f32(
897;CHECK: llvm.fmuladd.v4f32
898;CHECK: ret void
899define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
900entry:
901  %cmp12 = icmp sgt i32 %n, 0
902  br i1 %cmp12, label %for.body, label %for.end
903
904for.body:                                         ; preds = %entry, %for.body
905  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
906  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
907  %0 = load float, float* %arrayidx, align 4
908  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
909  %1 = load float, float* %arrayidx2, align 4
910  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
911  %2 = load float, float* %arrayidx4, align 4
912  %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
913  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
914  store float %3, float* %arrayidx6, align 4
915  %indvars.iv.next = add i64 %indvars.iv, 1
916  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
917  %exitcond = icmp eq i32 %lftr.wideiv, %n
918  br i1 %exitcond, label %for.end, label %for.body
919
920for.end:                                          ; preds = %for.body, %entry
921  ret void
922}
923
924declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
925
926;CHECK-LABEL: @fmuladd_f64(
927;CHECK: llvm.fmuladd.v4f64
928;CHECK: ret void
929define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
930entry:
931  %cmp12 = icmp sgt i32 %n, 0
932  br i1 %cmp12, label %for.body, label %for.end
933
934for.body:                                         ; preds = %entry, %for.body
935  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
936  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
937  %0 = load double, double* %arrayidx, align 8
938  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
939  %1 = load double, double* %arrayidx2, align 8
940  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
941  %2 = load double, double* %arrayidx4, align 8
942  %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
943  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
944  store double %3, double* %arrayidx6, align 8
945  %indvars.iv.next = add i64 %indvars.iv, 1
946  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
947  %exitcond = icmp eq i32 %lftr.wideiv, %n
948  br i1 %exitcond, label %for.end, label %for.body
949
950for.end:                                          ; preds = %for.body, %entry
951  ret void
952}
953
954declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
955
956;CHECK-LABEL: @pow_f32(
957;CHECK: llvm.pow.v4f32
958;CHECK: ret void
959define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
960entry:
961  %cmp9 = icmp sgt i32 %n, 0
962  br i1 %cmp9, label %for.body, label %for.end
963
964for.body:                                         ; preds = %entry, %for.body
965  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
966  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
967  %0 = load float, float* %arrayidx, align 4
968  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
969  %1 = load float, float* %arrayidx2, align 4
970  %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
971  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
972  store float %call, float* %arrayidx4, align 4
973  %indvars.iv.next = add i64 %indvars.iv, 1
974  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
975  %exitcond = icmp eq i32 %lftr.wideiv, %n
976  br i1 %exitcond, label %for.end, label %for.body
977
978for.end:                                          ; preds = %for.body, %entry
979  ret void
980}
981
982declare float @llvm.pow.f32(float, float) nounwind readnone
983
984;CHECK-LABEL: @pow_f64(
985;CHECK: llvm.pow.v4f64
986;CHECK: ret void
987define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
988entry:
989  %cmp9 = icmp sgt i32 %n, 0
990  br i1 %cmp9, label %for.body, label %for.end
991
992for.body:                                         ; preds = %entry, %for.body
993  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
994  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
995  %0 = load double, double* %arrayidx, align 8
996  %arrayidx2 = getelementptr inbounds double, double* %z, i64 %indvars.iv
997  %1 = load double, double* %arrayidx2, align 8
998  %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
999  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
1000  store double %call, double* %arrayidx4, align 8
1001  %indvars.iv.next = add i64 %indvars.iv, 1
1002  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1003  %exitcond = icmp eq i32 %lftr.wideiv, %n
1004  br i1 %exitcond, label %for.end, label %for.body
1005
1006for.end:                                          ; preds = %for.body, %entry
1007  ret void
1008}
1009
1010; CHECK: fabs_libm
1011; CHECK:  call <4 x float> @llvm.fabs.v4f32
1012; CHECK: ret void
1013define void @fabs_libm(float* nocapture %x) nounwind {
1014entry:
1015  br label %for.body
1016
1017for.body:                                         ; preds = %entry, %for.body
1018  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1019  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
1020  %0 = load float, float* %arrayidx, align 4
1021  %call = tail call float @fabsf(float %0) nounwind readnone
1022  store float %call, float* %arrayidx, align 4
1023  %indvars.iv.next = add i64 %indvars.iv, 1
1024  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1025  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1026  br i1 %exitcond, label %for.end, label %for.body
1027
1028for.end:                                          ; preds = %for.body
1029  ret void
1030}
1031
1032declare float @fabsf(float) nounwind readnone
1033
1034declare double @llvm.pow.f64(double, double) nounwind readnone
1035
1036
1037
1038; Make sure we don't replace calls to functions with standard library function
1039; signatures but defined with internal linkage.
1040
1041define internal float @roundf(float %x) nounwind readnone {
1042  ret float 0.00000000
1043}
1044; CHECK-LABEL: internal_round
1045; CHECK-NOT:  load <4 x float>
1046
1047define void @internal_round(float* nocapture %x) nounwind {
1048entry:
1049  br label %for.body
1050
1051for.body:                                         ; preds = %entry, %for.body
1052  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1053  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
1054  %0 = load float, float* %arrayidx, align 4
1055  %call = tail call float @roundf(float %0) nounwind readnone
1056  store float %call, float* %arrayidx, align 4
1057  %indvars.iv.next = add i64 %indvars.iv, 1
1058  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1059  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1060  br i1 %exitcond, label %for.end, label %for.body
1061
1062for.end:                                          ; preds = %for.body
1063  ret void
1064}
1065
1066; Make sure we don't replace calls to functions with standard library names but
1067; different signatures.
1068
1069declare void @round(double %f)
1070
1071; CHECK-LABEL: wrong_signature
1072; CHECK-NOT:  load <4 x double>
1073
1074define void @wrong_signature(double* nocapture %x) nounwind {
1075entry:
1076  br label %for.body
1077
1078for.body:                                         ; preds = %entry, %for.body
1079  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1080  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
1081  %0 = load double, double* %arrayidx, align 4
1082  store double %0, double* %arrayidx, align 4
1083  tail call void @round(double %0) nounwind readnone
1084  %indvars.iv.next = add i64 %indvars.iv, 1
1085  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1086  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1087  br i1 %exitcond, label %for.end, label %for.body
1088
1089for.end:                                          ; preds = %for.body
1090  ret void
1091}
1092
1093declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
1094
1095;CHECK-LABEL: @powi_f64(
1096;CHECK: llvm.powi.v4f64
1097;CHECK: ret void
1098define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
1099entry:
1100  %cmp9 = icmp sgt i32 %n, 0
1101  br i1 %cmp9, label %for.body, label %for.end
1102
1103for.body:                                         ; preds = %entry, %for.body
1104  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1105  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
1106  %0 = load double, double* %arrayidx, align 8
1107  %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
1108  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
1109  store double %call, double* %arrayidx4, align 8
1110  %indvars.iv.next = add i64 %indvars.iv, 1
1111  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1112  %exitcond = icmp eq i32 %lftr.wideiv, %n
1113  br i1 %exitcond, label %for.end, label %for.body
1114
1115for.end:                                          ; preds = %for.body, %entry
1116  ret void
1117}
1118
1119;CHECK-LABEL: @powi_f64_neg(
1120;CHECK-NOT: llvm.powi.v4f64
1121;CHECK: ret void
1122define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
1123entry:
1124  %cmp9 = icmp sgt i32 %n, 0
1125  br i1 %cmp9, label %for.body, label %for.end
1126
1127for.body:                                         ; preds = %entry, %for.body
1128  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1129  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
1130  %0 = load double, double* %arrayidx, align 8
1131  %1 = trunc i64 %indvars.iv to i32
1132  %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
1133  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
1134  store double %call, double* %arrayidx4, align 8
1135  %indvars.iv.next = add i64 %indvars.iv, 1
1136  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1137  %exitcond = icmp eq i32 %lftr.wideiv, %n
1138  br i1 %exitcond, label %for.end, label %for.body
1139
1140for.end:                                          ; preds = %for.body, %entry
1141  ret void
1142}
1143
1144declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
1145
1146;CHECK-LABEL: @cttz_f64(
1147;CHECK: llvm.cttz.v4i64
1148;CHECK: ret void
1149define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1150entry:
1151  %cmp9 = icmp sgt i32 %n, 0
1152  br i1 %cmp9, label %for.body, label %for.end
1153
1154for.body:                                         ; preds = %entry, %for.body
1155  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1156  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
1157  %0 = load i64, i64* %arrayidx, align 8
1158  %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
1159  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
1160  store i64 %call, i64* %arrayidx4, align 8
1161  %indvars.iv.next = add i64 %indvars.iv, 1
1162  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1163  %exitcond = icmp eq i32 %lftr.wideiv, %n
1164  br i1 %exitcond, label %for.end, label %for.body
1165
1166for.end:                                          ; preds = %for.body, %entry
1167  ret void
1168}
1169
1170declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
1171
1172;CHECK-LABEL: @ctlz_f64(
1173;CHECK: llvm.ctlz.v4i64
1174;CHECK: ret void
1175define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1176entry:
1177  %cmp9 = icmp sgt i32 %n, 0
1178  br i1 %cmp9, label %for.body, label %for.end
1179
1180for.body:                                         ; preds = %entry, %for.body
1181  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1182  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
1183  %0 = load i64, i64* %arrayidx, align 8
1184  %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
1185  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
1186  store i64 %call, i64* %arrayidx4, align 8
1187  %indvars.iv.next = add i64 %indvars.iv, 1
1188  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1189  %exitcond = icmp eq i32 %lftr.wideiv, %n
1190  br i1 %exitcond, label %for.end, label %for.body
1191
1192for.end:                                          ; preds = %for.body, %entry
1193  ret void
1194}
1195
1196declare float @llvm.minnum.f32(float, float) nounwind readnone
1197
1198;CHECK-LABEL: @minnum_f32(
1199;CHECK: llvm.minnum.v4f32
1200;CHECK: ret void
1201define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
1202entry:
1203  %cmp9 = icmp sgt i32 %n, 0
1204  br i1 %cmp9, label %for.body, label %for.end
1205
1206for.body:                                         ; preds = %entry, %for.body
1207  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1208  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
1209  %0 = load float, float* %arrayidx, align 4
1210  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
1211  %1 = load float, float* %arrayidx2, align 4
1212  %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
1213  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
1214  store float %call, float* %arrayidx4, align 4
1215  %indvars.iv.next = add i64 %indvars.iv, 1
1216  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1217  %exitcond = icmp eq i32 %lftr.wideiv, %n
1218  br i1 %exitcond, label %for.end, label %for.body
1219
1220for.end:                                          ; preds = %for.body, %entry
1221  ret void
1222}
1223
1224declare float @llvm.maxnum.f32(float, float) nounwind readnone
1225
1226;CHECK-LABEL: @maxnum_f32(
1227;CHECK: llvm.maxnum.v4f32
1228;CHECK: ret void
1229define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
1230entry:
1231  %cmp9 = icmp sgt i32 %n, 0
1232  br i1 %cmp9, label %for.body, label %for.end
1233
1234for.body:                                         ; preds = %entry, %for.body
1235  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1236  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
1237  %0 = load float, float* %arrayidx, align 4
1238  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
1239  %1 = load float, float* %arrayidx2, align 4
1240  %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
1241  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
1242  store float %call, float* %arrayidx4, align 4
1243  %indvars.iv.next = add i64 %indvars.iv, 1
1244  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1245  %exitcond = icmp eq i32 %lftr.wideiv, %n
1246  br i1 %exitcond, label %for.end, label %for.body
1247
1248for.end:                                          ; preds = %for.body, %entry
1249  ret void
1250}
1251