1; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s 2 3; Test that loops with different maximum offsets for different address 4; spaces are correctly handled. 5 6target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 7 8; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32( 9; OPT: {{^}}.lr.ph: 10; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] 11; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095 12; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1 13define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { 14bb: 15 %tmp = icmp sgt i32 %n, 0 16 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge 17 18.lr.ph.preheader: ; preds = %bb 19 br label %.lr.ph 20 21._crit_edge.loopexit: ; preds = %.lr.ph 22 br label %._crit_edge 23 24._crit_edge: ; preds = %._crit_edge.loopexit, %bb 25 ret void 26 27.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader 28 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 29 %tmp1 = add nuw nsw i64 %indvars.iv, 4095 30 %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 31 %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 32 %tmp4 = sext i8 %tmp3 to i32 33 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv 34 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 35 %tmp7 = add nsw i32 %tmp6, %tmp4 36 store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 37 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 38 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 39 %exitcond = icmp eq i32 %lftr.wideiv, %n 40 br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph 41} 42 43; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32( 44; OPT: {{^}}.lr.ph.preheader: 45; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096 46; OPT: br label %.lr.ph 47 48; OPT: {{^}}.lr.ph: 49; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] 50; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1 51define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { 52bb: 53 %tmp = icmp sgt i32 %n, 0 54 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge 55 56.lr.ph.preheader: ; preds = %bb 57 br label %.lr.ph 58 59._crit_edge.loopexit: ; preds = %.lr.ph 60 br label %._crit_edge 61 62._crit_edge: ; preds = %._crit_edge.loopexit, %bb 63 ret void 64 65.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader 66 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 67 %tmp1 = add nuw nsw i64 %indvars.iv, 4096 68 %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 69 %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 70 %tmp4 = sext i8 %tmp3 to i32 71 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv 72 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 73 %tmp7 = add nsw i32 %tmp6, %tmp4 74 store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 75 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 76 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 77 %exitcond = icmp eq i32 %lftr.wideiv, %n 78 br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph 79} 80 81; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32( 82; OPT: {{^}}.lr.ph 83; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] 84; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535 85; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1 86define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { 87bb: 88 %tmp = icmp sgt i32 %n, 0 89 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge 90 91.lr.ph.preheader: ; preds = %bb 92 br label %.lr.ph 93 94._crit_edge.loopexit: ; preds = %.lr.ph 95 br label %._crit_edge 96 97._crit_edge: ; preds = %._crit_edge.loopexit, %bb 98 ret void 99 100.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader 101 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 102 %tmp1 = add nuw nsw i64 %indvars.iv, 65535 103 %tmp2 = trunc i64 %tmp1 to i32 104 %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 105 %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 106 %tmp5 = sext i8 %tmp4 to i32 107 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv 108 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 109 %tmp8 = add nsw i32 %tmp7, %tmp5 110 store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 111 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 112 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 113 %exitcond = icmp eq i32 %lftr.wideiv, %n 114 br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph 115} 116 117; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32( 118; OPT: {{^}}.lr.ph.preheader: 119; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536 120; OPT: br label %.lr.ph 121 122; OPT: {{^}}.lr.ph: 123; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] 124; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1 125define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { 126bb: 127 %tmp = icmp sgt i32 %n, 0 128 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge 129 130.lr.ph.preheader: ; preds = %bb 131 br label %.lr.ph 132 133._crit_edge.loopexit: ; preds = %.lr.ph 134 br label %._crit_edge 135 136._crit_edge: ; preds = %._crit_edge.loopexit, %bb 137 ret void 138 139.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader 140 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 141 %tmp1 = add nuw nsw i64 %indvars.iv, 65536 142 %tmp2 = trunc i64 %tmp1 to i32 143 %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 144 %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 145 %tmp5 = sext i8 %tmp4 to i32 146 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv 147 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 148 %tmp8 = add nsw i32 %tmp7, %tmp5 149 store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 150 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 151 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 152 %exitcond = icmp eq i32 %lftr.wideiv, %n 153 br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph 154} 155 156attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" } 157