1; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
2
3@lds0 = addrspace(3) global [512 x float] undef
4@lds1 = addrspace(3) global [256 x float] undef
5@lds2 = addrspace(3) global [4096 x float] undef
6@lds3 = addrspace(3) global [67 x i8] undef
7
8@dynamic_shared0 = external addrspace(3) global [0 x float]
9@dynamic_shared1 = external addrspace(3) global [0 x double]
10@dynamic_shared2 = external addrspace(3) global [0 x double], align 4
11@dynamic_shared3 = external addrspace(3) global [0 x double], align 16
12
13; CHECK-LABEL: {{^}}dynamic_shared_array_0:
14; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
15define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) {
16  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
17  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x
18  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
19  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
20  store float %val0, float addrspace(3)* %arrayidx1, align 4
21  ret void
22}
23
24; CHECK-LABEL: {{^}}dynamic_shared_array_1:
25; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}}
26; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}}
27; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
28; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]]
29define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) {
30entry:
31  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
32  %idx.0 = add nsw i32 %tid.x, 64
33  %tmp = icmp eq i32 %cond, 0
34  br i1 %tmp, label %if, label %else
35
36if:                                               ; preds = %entry
37  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
38  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
39  br label %endif
40
41else:                                             ; preds = %entry
42  %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
43  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
44  br label %endif
45
46endif:                                            ; preds = %else, %if
47  %val = phi float [ %val0, %if ], [ %val1, %else ]
48  %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
49  store float %val, float addrspace(3)* %arrayidx, align 4
50  ret void
51}
52
53; CHECK-LABEL: {{^}}dynamic_shared_array_2:
54; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
55; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x4000, [[IDX]]
56define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
57  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
58  %vidx = add i32 %tid.x, %idx
59  %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx
60  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
61  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
62  store float %val0, float addrspace(3)* %arrayidx1, align 4
63  ret void
64}
65
66; The offset to the dynamic shared memory array should be aligned on the type
67; specified.
68; CHECK-LABEL: {{^}}dynamic_shared_array_3:
69; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
70; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]]
71define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
72  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
73  %vidx = add i32 %tid.x, %idx
74  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
75  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
76  %val1 = uitofp i8 %val0 to float
77  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
78  store float %val1, float addrspace(3)* %arrayidx1, align 4
79  ret void
80}
81
82; The offset to the dynamic shared memory array should be aligned on the
83; maximal one.
84; CHECK-LABEL: {{^}}dynamic_shared_array_4:
85; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48
86; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
87; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
88define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
89  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
90  %vidx = add i32 %tid.x, %idx
91  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
92  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
93  %val1 = uitofp i8 %val0 to float
94  %val2 = uitofp i8 %val0 to double
95  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
96  store float %val1, float addrspace(3)* %arrayidx1, align 4
97  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x
98  store double %val2, double addrspace(3)* %arrayidx2, align 4
99  ret void
100}
101
102; Honor the explicit alignment from the specified variable.
103; CHECK-LABEL: {{^}}dynamic_shared_array_5:
104; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44
105; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
106; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
107define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
108  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
109  %vidx = add i32 %tid.x, %idx
110  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
111  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
112  %val1 = uitofp i8 %val0 to float
113  %val2 = uitofp i8 %val0 to double
114  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
115  store float %val1, float addrspace(3)* %arrayidx1, align 4
116  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x
117  store double %val2, double addrspace(3)* %arrayidx2, align 4
118  ret void
119}
120
121; Honor the explicit alignment from the specified variable.
122; CHECK-LABEL: {{^}}dynamic_shared_array_6:
123; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50
124; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
125; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
126define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
127  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
128  %vidx = add i32 %tid.x, %idx
129  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
130  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
131  %val1 = uitofp i8 %val0 to float
132  %val2 = uitofp i8 %val0 to double
133  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
134  store float %val1, float addrspace(3)* %arrayidx1, align 4
135  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x
136  store double %val2, double addrspace(3)* %arrayidx2, align 4
137  ret void
138}
139
140declare i32 @llvm.amdgcn.workitem.id.x()
141