1; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
2; RUN: opt -S -mtriple=amdgcn-- -passes="separate-const-offset-from-gep,slsr,gvn" < %s | FileCheck %s
3
4target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
5
6
7; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
8; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
9; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
10define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
11bb:
12  %i2 = shl nsw i32 %i, 1
13  %j1 = add nsw i32 %i, 1023
14  %tmp = sext i32 %j1 to i64
15  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
16  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
17  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
18  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
19  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
20
21  %j2 = add nsw i32 %i2, 1023
22  %tmp5 = sext i32 %j2 to i64
23  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
24  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
25  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
26  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
27  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
28
29  ret void
30}
31
32; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
33; CHECK: %j1 = add nsw i32 %i, 1024
34; CHECK: %tmp = sext i32 %j1 to i64
35; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
36; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
37define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
38bb:
39  %i2 = shl nsw i32 %i, 1
40  %j1 = add nsw i32 %i, 1024
41  %tmp = sext i32 %j1 to i64
42  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
43  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
44  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
45  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
46  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
47
48  %j2 = add nsw i32 %i2, 1024
49  %tmp5 = sext i32 %j2 to i64
50  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
51  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
52  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
53  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
54  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
55
56  ret void
57}
58
59; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
60; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
61; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
62
63; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
64; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
65define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
66bb:
67  %i2 = shl nsw i32 %i, 1
68  %j1 = add nsw i32 %i, 16383
69  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
70  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
71  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
72  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
73  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
74
75  %j2 = add nsw i32 %i2, 16383
76  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
77  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
78  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
79  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
80  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
81
82  ret void
83}
84
85; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
86; CHECK: %j1 = add nsw i32 %i, 16384
87; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
88; CHECK: %j2 = add i32 %j1, %i
89; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
90define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
91bb:
92  %i2 = shl nsw i32 %i, 1
93  %j1 = add nsw i32 %i, 16384
94  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
95  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
96  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
97  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
98  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
99
100  %j2 = add nsw i32 %i2, 16384
101  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
102  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
103  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
104  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
105  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
106
107  ret void
108}
109