1; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5; Vectorized subsets of the load/store chains in the presence of
6; interleaved loads/stores
7
8; CHECK-LABEL: @interleave_2L_2S(
9; CHECK: load <2 x i32>
10; CHECK: load i32
11; CHECK: store <2 x i32>
12; CHECK: load i32
13define void @interleave_2L_2S(i32* noalias %ptr) {
14  %next.gep = getelementptr i32, i32* %ptr, i64 0
15  %next.gep1 = getelementptr i32, i32* %ptr, i64 1
16  %next.gep2 = getelementptr i32, i32* %ptr, i64 2
17
18  %l1 = load i32, i32* %next.gep1, align 4
19  %l2 = load i32, i32* %next.gep, align 4
20  store i32 0, i32* %next.gep1, align 4
21  store i32 0, i32* %next.gep, align 4
22  %l3 = load i32, i32* %next.gep1, align 4
23  %l4 = load i32, i32* %next.gep2, align 4
24
25  ret void
26}
27
28; CHECK-LABEL: @interleave_3L_2S_1L(
29; CHECK: load <3 x i32>
30; CHECK: store <2 x i32>
31; CHECK: load i32
32
33define void @interleave_3L_2S_1L(i32* noalias %ptr) {
34  %next.gep = getelementptr i32, i32* %ptr, i64 0
35  %next.gep1 = getelementptr i32, i32* %ptr, i64 1
36  %next.gep2 = getelementptr i32, i32* %ptr, i64 2
37
38  %l2 = load i32, i32* %next.gep, align 4
39  %l1 = load i32, i32* %next.gep1, align 4
40  store i32 0, i32* %next.gep1, align 4
41  store i32 0, i32* %next.gep, align 4
42  %l3 = load i32, i32* %next.gep1, align 4
43  %l4 = load i32, i32* %next.gep2, align 4
44
45  ret void
46}
47
48; CHECK-LABEL: @chain_suffix(
49; CHECK: load i32
50; CHECK: store <2 x i32>
51; CHECK: load <2 x i32>
52define void @chain_suffix(i32* noalias %ptr) {
53  %next.gep = getelementptr i32, i32* %ptr, i64 0
54  %next.gep1 = getelementptr i32, i32* %ptr, i64 1
55  %next.gep2 = getelementptr i32, i32* %ptr, i64 2
56
57  %l2 = load i32, i32* %next.gep, align 4
58  store i32 0, i32* %next.gep1, align 4
59  store i32 0, i32* %next.gep, align 4
60  %l3 = load i32, i32* %next.gep1, align 4
61  %l4 = load i32, i32* %next.gep2, align 4
62
63  ret void
64}
65
66
67; CHECK-LABEL: @chain_prefix_suffix(
68; CHECK: load <2 x i32>
69; CHECK: store <2 x i32>
70; CHECK: load <3 x i32>
71define void  @chain_prefix_suffix(i32* noalias %ptr) {
72  %next.gep = getelementptr i32, i32* %ptr, i64 0
73  %next.gep1 = getelementptr i32, i32* %ptr, i64 1
74  %next.gep2 = getelementptr i32, i32* %ptr, i64 2
75  %next.gep3 = getelementptr i32, i32* %ptr, i64 3
76
77  %l1 = load i32, i32* %next.gep, align 4
78  %l2 = load i32, i32* %next.gep1, align 4
79  store i32 0, i32* %next.gep1, align 4
80  store i32 0, i32* %next.gep2, align 4
81  %l3 = load i32, i32* %next.gep1, align 4
82  %l4 = load i32, i32* %next.gep2, align 4
83  %l5 = load i32, i32* %next.gep3, align 4
84
85  ret void
86}
87
88