1; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s 2; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5 6; Vectorized subsets of the load/store chains in the presence of 7; interleaved loads/stores 8 9; CHECK-LABEL: @interleave_2L_2S( 10; CHECK: load <2 x i32> 11; CHECK: load i32 12; CHECK: store <2 x i32> 13; CHECK: load i32 14define void @interleave_2L_2S(i32* noalias %ptr) { 15 %next.gep = getelementptr i32, i32* %ptr, i64 0 16 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 17 %next.gep2 = getelementptr i32, i32* %ptr, i64 2 18 19 %l1 = load i32, i32* %next.gep1, align 4 20 %l2 = load i32, i32* %next.gep, align 4 21 store i32 0, i32* %next.gep1, align 4 22 store i32 0, i32* %next.gep, align 4 23 %l3 = load i32, i32* %next.gep1, align 4 24 %l4 = load i32, i32* %next.gep2, align 4 25 26 ret void 27} 28 29; CHECK-LABEL: @interleave_3L_2S_1L( 30; CHECK: load <3 x i32> 31; CHECK: store <2 x i32> 32; CHECK: load i32 33 34define void @interleave_3L_2S_1L(i32* noalias %ptr) { 35 %next.gep = getelementptr i32, i32* %ptr, i64 0 36 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 37 %next.gep2 = getelementptr i32, i32* %ptr, i64 2 38 39 %l2 = load i32, i32* %next.gep, align 4 40 %l1 = load i32, i32* %next.gep1, align 4 41 store i32 0, i32* %next.gep1, align 4 42 store i32 0, i32* %next.gep, align 4 43 %l3 = load i32, i32* %next.gep1, align 4 44 %l4 = load i32, i32* %next.gep2, align 4 45 46 ret void 47} 48 49; CHECK-LABEL: @chain_suffix( 50; CHECK: load i32 51; CHECK: store <2 x i32> 52; CHECK: load <2 x i32> 53define void @chain_suffix(i32* noalias %ptr) { 54 %next.gep = getelementptr i32, i32* %ptr, i64 0 55 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 56 %next.gep2 = getelementptr i32, i32* %ptr, i64 2 57 58 %l2 = load i32, i32* %next.gep, align 4 59 store i32 0, i32* %next.gep1, align 4 60 store i32 0, i32* %next.gep, align 4 61 %l3 = load i32, i32* %next.gep1, align 4 62 %l4 = load i32, i32* %next.gep2, align 4 63 64 ret void 65} 66 67 68; CHECK-LABEL: @chain_prefix_suffix( 69; CHECK: load <2 x i32> 70; CHECK: store <2 x i32> 71; CHECK: load <3 x i32> 72define void @chain_prefix_suffix(i32* noalias %ptr) { 73 %next.gep = getelementptr i32, i32* %ptr, i64 0 74 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 75 %next.gep2 = getelementptr i32, i32* %ptr, i64 2 76 %next.gep3 = getelementptr i32, i32* %ptr, i64 3 77 78 %l1 = load i32, i32* %next.gep, align 4 79 %l2 = load i32, i32* %next.gep1, align 4 80 store i32 0, i32* %next.gep1, align 4 81 store i32 0, i32* %next.gep2, align 4 82 %l3 = load i32, i32* %next.gep1, align 4 83 %l4 = load i32, i32* %next.gep2, align 4 84 %l5 = load i32, i32* %next.gep3, align 4 85 86 ret void 87} 88 89; FIXME: If the chain is too long and TLI says misaligned is not fast, 90; then LSV fails to vectorize anything in that chain. 91; To reproduce below, add a tmp5 (ptr+4) and load tmp5 into l6 and l7. 92 93; CHECK-LABEL: @interleave_get_longest 94; CHECK: load <3 x i32> 95; CHECK: load i32 96; CHECK: store <2 x i32> zeroinitializer 97; CHECK: load i32 98; CHECK: load i32 99; CHECK: load i32 100 101define void @interleave_get_longest(i32* noalias %ptr) { 102 %tmp1 = getelementptr i32, i32* %ptr, i64 0 103 %tmp2 = getelementptr i32, i32* %ptr, i64 1 104 %tmp3 = getelementptr i32, i32* %ptr, i64 2 105 %tmp4 = getelementptr i32, i32* %ptr, i64 3 106 107 %l1 = load i32, i32* %tmp2, align 4 108 %l2 = load i32, i32* %tmp1, align 4 109 store i32 0, i32* %tmp2, align 4 110 store i32 0, i32* %tmp1, align 4 111 %l3 = load i32, i32* %tmp2, align 4 112 %l4 = load i32, i32* %tmp3, align 4 113 %l5 = load i32, i32* %tmp4, align 4 114 %l6 = load i32, i32* %tmp4, align 4 115 %l7 = load i32, i32* %tmp4, align 4 116 117 ret void 118} 119