1; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s 2 3target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 4 5declare i32 @llvm.amdgcn.workitem.id.x() #1 6 7; CHECK-LABEL: @basic_merge_sext_index( 8; CHECK: sext i32 %id.x to i64 9; CHECK: load <2 x float> 10; CHECK: store <2 x float> zeroinitializer 11define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { 12entry: 13 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 14 %sext.id.x = sext i32 %id.x to i64 15 %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %sext.id.x 16 %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %sext.id.x 17 %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 18 %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 19 20 %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 21 %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 22 23 store float 0.0, float addrspace(1)* %a.idx.x, align 4 24 store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 25 26 %add = fadd float %ld.c, %ld.c.idx.1 27 store float %add, float addrspace(1)* %b, align 4 28 ret void 29} 30 31; CHECK-LABEL: @basic_merge_zext_index( 32; CHECK: zext i32 %id.x to i64 33; CHECK: load <2 x float> 34; CHECK: store <2 x float> 35define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { 36entry: 37 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 38 %zext.id.x = zext i32 %id.x to i64 39 %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x 40 %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x 41 %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 42 %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 43 44 %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 45 %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 46 store float 0.0, float addrspace(1)* %a.idx.x, align 4 47 store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 48 49 %add = fadd float %ld.c, %ld.c.idx.1 50 store float %add, float addrspace(1)* %b, align 4 51 ret void 52} 53 54; CHECK-LABEL: @merge_op_zext_index( 55; CHECK: load <2 x float> 56; CHECK: store <2 x float> 57define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { 58entry: 59 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 60 %shl = shl i32 %id.x, 2 61 %zext.id.x = zext i32 %shl to i64 62 %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x 63 %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x 64 65 %id.x.1 = or i32 %shl, 1 66 %id.x.1.ext = zext i32 %id.x.1 to i64 67 68 %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext 69 %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext 70 71 %ld.c.0 = load float, float addrspace(1)* %c.0, align 4 72 store float 0.0, float addrspace(1)* %a.0, align 4 73 %ld.c.1 = load float, float addrspace(1)* %c.1, align 4 74 store float 0.0, float addrspace(1)* %a.1, align 4 75 76 %add = fadd float %ld.c.0, %ld.c.1 77 store float %add, float addrspace(1)* %b, align 4 78 ret void 79} 80 81; CHECK-LABEL: @merge_op_sext_index( 82; CHECK: load <2 x float> 83; CHECK: store <2 x float> 84define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { 85entry: 86 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 87 %shl = shl i32 %id.x, 2 88 %zext.id.x = sext i32 %shl to i64 89 %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x 90 %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x 91 92 %id.x.1 = or i32 %shl, 1 93 %id.x.1.ext = sext i32 %id.x.1 to i64 94 95 %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext 96 %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext 97 98 %ld.c.0 = load float, float addrspace(1)* %c.0, align 4 99 store float 0.0, float addrspace(1)* %a.0, align 4 100 %ld.c.1 = load float, float addrspace(1)* %c.1, align 4 101 store float 0.0, float addrspace(1)* %a.1, align 4 102 103 %add = fadd float %ld.c.0, %ld.c.1 104 store float %add, float addrspace(1)* %b, align 4 105 ret void 106} 107 108; This case fails to vectorize if not using the extra extension 109; handling in isConsecutiveAccess. 110 111; CHECK-LABEL: @zext_trunc_phi_1( 112; CHECK: loop: 113; CHECK: load <2 x i32> 114; CHECK: store <2 x i32> 115define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { 116entry: 117 %cmp0 = icmp eq i32 %n, 0 118 br i1 %cmp0, label %exit, label %loop 119 120loop: 121 %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ] 122 %trunc.iv = trunc i64 %indvars.iv to i32 123 %idx = shl i32 %trunc.iv, 4 124 125 %idx.ext = zext i32 %idx to i64 126 %c.0 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.ext 127 %a.0 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext 128 129 %idx.1 = or i32 %idx, 1 130 %idx.1.ext = zext i32 %idx.1 to i64 131 %c.1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.1.ext 132 %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.1.ext 133 134 %ld.c.0 = load i32, i32 addrspace(1)* %c.0, align 4 135 store i32 %ld.c.0, i32 addrspace(1)* %a.0, align 4 136 %ld.c.1 = load i32, i32 addrspace(1)* %c.1, align 4 137 store i32 %ld.c.1, i32 addrspace(1)* %a.1, align 4 138 139 %indvars.iv.next = add i64 %indvars.iv, 1 140 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 141 142 %exitcond = icmp eq i32 %lftr.wideiv, %n 143 br i1 %exitcond, label %exit, label %loop 144 145exit: 146 ret void 147} 148 149attributes #0 = { nounwind } 150attributes #1 = { nounwind readnone } 151