1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
2
3declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
4
5; OPT-LABEL: @constant_load_i1
6; OPT: load i1
7; OPT-NEXT: store i1
8define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
9  %val = load i1, i1 addrspace(4)* %in
10  store i1 %val, i1 addrspace(1)* %out
11  ret void
12}
13
14; OPT-LABEL: @constant_load_i1_align2
15; OPT: load i1
16; OPT-NEXT: store
17define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
18  %val = load i1, i1 addrspace(4)* %in, align 2
19  store i1 %val, i1 addrspace(1)* %out, align 2
20  ret void
21}
22
23; OPT-LABEL: @constant_load_i1_align4
24; OPT: bitcast
25; OPT-NEXT: load i32
26; OPT-NEXT: trunc
27; OPT-NEXT: store
28define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
29  %val = load i1, i1 addrspace(4)* %in, align 4
30  store i1 %val, i1 addrspace(1)* %out, align 4
31  ret void
32}
33
34; OPT-LABEL: @constant_load_i8
35; OPT: load i8
36; OPT-NEXT: store
37define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
38  %val = load i8, i8 addrspace(4)* %in
39  store i8 %val, i8 addrspace(1)* %out
40  ret void
41}
42
43; OPT-LABEL: @constant_load_i8_align2
44; OPT: load i8
45; OPT-NEXT: store
46define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
47  %val = load i8, i8 addrspace(4)* %in, align 2
48  store i8 %val, i8 addrspace(1)* %out, align 2
49  ret void
50}
51
52; OPT-LABEL: @constant_load_i8align4
53; OPT: bitcast
54; OPT-NEXT: load i32
55; OPT-NEXT: trunc
56; OPT-NEXT: store
57define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
58  %val = load i8, i8 addrspace(4)* %in, align 4
59  store i8 %val, i8 addrspace(1)* %out, align 4
60  ret void
61}
62
63
64; OPT-LABEL: @constant_load_v2i8
65; OPT: load <2 x i8>
66; OPT-NEXT: store
67define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
68  %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
69  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
70  ret void
71}
72
73; OPT-LABEL: @constant_load_v2i8_align4
74; OPT: bitcast
75; OPT-NEXT: load i32
76; OPT-NEXT: trunc
77; OPT-NEXT: bitcast
78; OPT-NEXT: store
79define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
80  %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4
81  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
82  ret void
83}
84
85; OPT-LABEL: @constant_load_v3i8
86; OPT: bitcast <3 x i8>
87; OPT-NEXT: load i32, i32 addrspace(4)
88; OPT-NEXT: trunc i32
89; OPT-NEXT: bitcast i24
90; OPT-NEXT: store <3 x i8>
91define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
92  %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
93  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
94  ret void
95}
96
97; OPT-LABEL: @constant_load_v3i8_align4
98; OPT: bitcast <3 x i8>
99; OPT-NEXT: load i32, i32 addrspace(4)
100; OPT-NEXT: trunc i32
101; OPT-NEXT: bitcast i24
102; OPT-NEXT: store <3 x i8>
103define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
104  %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4
105  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
106  ret void
107}
108
109; OPT-LABEL: @constant_load_i16
110; OPT: load i16
111; OPT: sext
112; OPT-NEXT: store
113define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
114  %ld = load i16, i16 addrspace(4)* %in
115  %ext = sext i16 %ld to i32
116  store i32 %ext, i32 addrspace(1)* %out
117  ret void
118}
119
120; OPT-LABEL: @constant_load_i16_align4
121; OPT: bitcast
122; OPT-NEXT: load i32
123; OPT-NEXT: trunc
124; OPT-NEXT: sext
125; OPT-NEXT: store
126define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
127  %ld = load i16, i16 addrspace(4)* %in, align 4
128  %ext = sext i16 %ld to i32
129  store i32 %ext, i32 addrspace(1)* %out, align 4
130  ret void
131}
132
133; OPT-LABEL: @constant_load_f16
134; OPT: load half
135; OPT-NEXT: store
136define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 {
137  %ld = load half, half addrspace(4)* %in
138  store half %ld, half addrspace(1)* %out
139  ret void
140}
141
142; OPT-LABEL: @constant_load_v2f16
143; OPT: load <2 x half>
144; OPT-NEXT: store
145define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 {
146  %ld = load <2 x half>, <2 x half> addrspace(4)* %in
147  store <2 x half> %ld, <2 x half> addrspace(1)* %out
148  ret void
149}
150
151; OPT-LABEL: @load_volatile
152; OPT: load volatile i16
153; OPT-NEXT: store
154define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
155  %a = load volatile i16, i16 addrspace(4)* %in
156  store i16 %a, i16 addrspace(1)* %out
157  ret void
158}
159
160; OPT-LABEL: @constant_load_v2i8_volatile
161; OPT: load volatile <2 x i8>
162; OPT-NEXT: store
163define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
164  %ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in
165  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
166  ret void
167}
168
169; OPT-LABEL: @constant_load_v2i8_addrspace1
170; OPT: load <2 x i8>
171; OPT-NEXT: store
172define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
173  %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
174  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
175  ret void
176}
177
178; OPT-LABEL: @use_dispatch_ptr
179; OPT: bitcast
180; OPT-NEXT: load i32
181; OPT-NEXT: trunc
182; OPT-NEXT: zext
183; OPT-NEXT: store
184define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
185  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
186  %val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4
187  %ld = zext i8 %val to i32
188  store i32 %ld, i32 addrspace(1)* %ptr
189  ret void
190}
191
192; OPT-LABEL: @constant_load_i16_align4_range(
193; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0
194define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
195  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !0
196  %ext = sext i16 %ld to i32
197  store i32 %ext, i32 addrspace(1)* %out
198  ret void
199}
200
201; OPT-LABEL: @constant_load_i16_align4_range_max(
202; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0
203define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
204  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !1
205  %ext = sext i16 %ld to i32
206  store i32 %ext, i32 addrspace(1)* %out
207  ret void
208}
209
210; OPT-LABEL: @constant_load_i16_align4_complex_range(
211; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !1
212define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
213  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !2
214  %ext = sext i16 %ld to i32
215  store i32 %ext, i32 addrspace(1)* %out
216  ret void
217}
218
219; OPT-LABEL: @constant_load_i16_align4_range_from_0(
220; OPT: load i32, i32 addrspace(4)* %1, align 4{{$}}
221define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
222  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !3
223  %ext = sext i16 %ld to i32
224  store i32 %ext, i32 addrspace(1)* %out
225  ret void
226}
227
228; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
229; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2
230define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
231  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !4
232  %ext = sext i16 %ld to i32
233  store i32 %ext, i32 addrspace(1)* %out
234  ret void
235}
236
237; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
238; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2
239define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
240  %ld = load i16, i16 addrspace(4)* %in, align 4, !range !5
241  %ext = sext i16 %ld to i32
242  store i32 %ext, i32 addrspace(1)* %out
243  ret void
244}
245
246; OPT-LABEL: @constant_load_i16_align4_invariant
247; OPT: load i32, i32 addrspace(4)* %1, align 4, !invariant.load !3
248define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
249  %ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6
250  %ext = sext i16 %ld to i32
251  store i32 %ext, i32 addrspace(1)* %out
252  ret void
253}
254
255attributes #0 = { nounwind }
256
257; OPT: !0 = !{i32 5, i32 0}
258; OPT: !1 = !{i32 8, i32 0}
259; OPT: !2 = !{i32 65520, i32 0}
260; OPT: !3 = !{}
261
262!0 = !{i16 5, i16 500}
263!1 = !{i16 5, i16 -1}
264!2 = !{i16 8, i16 12, i16 42, i16 99}
265!3 = !{i16 0, i16 255}
266!4 = !{i16 -16, i16 16}
267!5 = !{i16 -16, i16 0}
268!6 = !{}
269