1; REQUIRES: asserts
2; RUN: opt -loop-vectorize -S -mattr=avx512bw --debug-only=loop-vectorize < %s 2>&1| FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7@A = global [10240 x i16] zeroinitializer, align 16
8@B = global [10240 x i16] zeroinitializer, align 16
9
10; Function Attrs: nounwind uwtable
11define void @load_i16_stride2() {
12;CHECK-LABEL: load_i16_stride2
13;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
14;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
15;CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %1 = load
16;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
17;CHECK: Found an estimated cost of 2 for VF 16 For instruction:   %1 = load
18;CHECK: Found an estimated cost of 3 for VF 32 For instruction:   %1 = load
19entry:
20  br label %for.body
21
22for.body:                                         ; preds = %for.body, %entry
23  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
24  %0 = shl nsw i64 %indvars.iv, 1
25  %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
26  %1 = load i16, i16* %arrayidx, align 4
27  %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
28  store i16 %1, i16* %arrayidx2, align 2
29  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
30  %exitcond = icmp eq i64 %indvars.iv.next, 1024
31  br i1 %exitcond, label %for.end, label %for.body
32
33for.end:                                          ; preds = %for.body
34  ret void
35}
36
37define void @load_i16_stride3() {
38;CHECK-LABEL: load_i16_stride3
39;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
40;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
41;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
42;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
43;CHECK: Found an estimated cost of 3 for VF 16 For instruction:   %1 = load
44;CHECK: Found an estimated cost of 5 for VF 32 For instruction:   %1 = load
45entry:
46  br label %for.body
47
48for.body:                                         ; preds = %for.body, %entry
49  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
50  %0 = mul nsw i64 %indvars.iv, 3
51  %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
52  %1 = load i16, i16* %arrayidx, align 4
53  %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
54  store i16 %1, i16* %arrayidx2, align 2
55  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
56  %exitcond = icmp eq i64 %indvars.iv.next, 1024
57  br i1 %exitcond, label %for.end, label %for.body
58
59for.end:                                          ; preds = %for.body
60  ret void
61}
62
63define void @load_i16_stride4() {
64;CHECK-LABEL: load_i16_stride4
65;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
66;CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %1 = load
67;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
68;CHECK: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load
69;CHECK: Found an estimated cost of 3 for VF 16 For instruction:   %1 = load
70;CHECK: Found an estimated cost of 8 for VF 32 For instruction:   %1 = load
71entry:
72  br label %for.body
73
74for.body:                                         ; preds = %for.body, %entry
75  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
76  %0 = shl nsw i64 %indvars.iv, 2
77  %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
78  %1 = load i16, i16* %arrayidx, align 4
79  %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
80  store i16 %1, i16* %arrayidx2, align 2
81  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
82  %exitcond = icmp eq i64 %indvars.iv.next, 1024
83  br i1 %exitcond, label %for.end, label %for.body
84
85for.end:                                          ; preds = %for.body
86  ret void
87}
88
89define void @load_i16_stride5() {
90;CHECK-LABEL: load_i16_stride5
91;CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load
92;CHECK: Found an estimated cost of 2 for VF 2 For instruction:   %1 = load
93;CHECK: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load
94;CHECK: Found an estimated cost of 3 for VF 8 For instruction:   %1 = load
95;CHECK: Found an estimated cost of 5 for VF 16 For instruction:   %1 = load
96;CHECK: Found an estimated cost of 10 for VF 32 For instruction:   %1 = load
97entry:
98  br label %for.body
99
100for.body:                                         ; preds = %for.body, %entry
101  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
102  %0 = mul nsw i64 %indvars.iv, 5
103  %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
104  %1 = load i16, i16* %arrayidx, align 4
105  %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
106  store i16 %1, i16* %arrayidx2, align 2
107  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
108  %exitcond = icmp eq i64 %indvars.iv.next, 1024
109  br i1 %exitcond, label %for.end, label %for.body
110
111for.end:                                          ; preds = %for.body
112  ret void
113}
114