1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces -instsimplify %s | FileCheck %s
3
4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
5
6define i8 @ptrmask_cast_local_to_flat(i8 addrspace(3)* %src.ptr, i64 %mask) {
7; CHECK-LABEL: @ptrmask_cast_local_to_flat(
8; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(3)* [[SRC_PTR:%.*]] to i8*
9; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]])
10; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
11; CHECK-NEXT:    ret i8 [[LOAD]]
12;
13  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
14  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
15  %load = load i8, i8* %masked
16  ret i8 %load
17}
18
19define i8 @ptrmask_cast_private_to_flat(i8 addrspace(5)* %src.ptr, i64 %mask) {
20; CHECK-LABEL: @ptrmask_cast_private_to_flat(
21; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(5)* [[SRC_PTR:%.*]] to i8*
22; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]])
23; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
24; CHECK-NEXT:    ret i8 [[LOAD]]
25;
26  %cast = addrspacecast i8 addrspace(5)* %src.ptr to i8*
27  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
28  %load = load i8, i8* %masked
29  ret i8 %load
30}
31
32define i8 @ptrmask_cast_region_to_flat(i8 addrspace(2)* %src.ptr, i64 %mask) {
33; CHECK-LABEL: @ptrmask_cast_region_to_flat(
34; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(2)* [[SRC_PTR:%.*]] to i8*
35; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]])
36; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
37; CHECK-NEXT:    ret i8 [[LOAD]]
38;
39  %cast = addrspacecast i8 addrspace(2)* %src.ptr to i8*
40  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
41  %load = load i8, i8* %masked
42  ret i8 %load
43}
44
45define i8 @ptrmask_cast_global_to_flat(i8 addrspace(1)* %src.ptr, i64 %mask) {
46; CHECK-LABEL: @ptrmask_cast_global_to_flat(
47; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[SRC_PTR:%.*]], i64 [[MASK:%.*]])
48; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1
49; CHECK-NEXT:    ret i8 [[LOAD]]
50;
51  %cast = addrspacecast i8 addrspace(1)* %src.ptr to i8*
52  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
53  %load = load i8, i8* %masked
54  ret i8 %load
55}
56
57define i8 @ptrmask_cast_999_to_flat(i8 addrspace(999)* %src.ptr, i64 %mask) {
58; CHECK-LABEL: @ptrmask_cast_999_to_flat(
59; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(999)* @llvm.ptrmask.p999i8.i64(i8 addrspace(999)* [[SRC_PTR:%.*]], i64 [[MASK:%.*]])
60; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(999)* [[TMP1]], align 1
61; CHECK-NEXT:    ret i8 [[LOAD]]
62;
63  %cast = addrspacecast i8 addrspace(999)* %src.ptr to i8*
64  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
65  %load = load i8, i8* %masked
66  ret i8 %load
67}
68
69define i8 @ptrmask_cast_flat_to_local(i8* %ptr, i64 %mask) {
70; CHECK-LABEL: @ptrmask_cast_flat_to_local(
71; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 [[MASK:%.*]])
72; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8* [[MASKED]] to i8 addrspace(3)*
73; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[CAST]], align 1
74; CHECK-NEXT:    ret i8 [[LOAD]]
75;
76  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask)
77  %cast = addrspacecast i8* %masked to i8 addrspace(3)*
78  %load = load i8, i8 addrspace(3)* %cast
79  ret i8 %load
80}
81
82define i8 @ptrmask_cast_flat_to_private(i8* %ptr, i64 %mask) {
83; CHECK-LABEL: @ptrmask_cast_flat_to_private(
84; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 [[MASK:%.*]])
85; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8* [[MASKED]] to i8 addrspace(5)*
86; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[CAST]], align 1
87; CHECK-NEXT:    ret i8 [[LOAD]]
88;
89  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask)
90  %cast = addrspacecast i8* %masked to i8 addrspace(5)*
91  %load = load i8, i8 addrspace(5)* %cast
92  ret i8 %load
93}
94
95define i8 @ptrmask_cast_flat_to_global(i8* %ptr, i64 %mask) {
96; CHECK-LABEL: @ptrmask_cast_flat_to_global(
97; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 [[MASK:%.*]])
98; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8* [[MASKED]] to i8 addrspace(1)*
99; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(1)* [[CAST]], align 1
100; CHECK-NEXT:    ret i8 [[LOAD]]
101;
102  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask)
103  %cast = addrspacecast i8* %masked to i8 addrspace(1)*
104  %load = load i8, i8 addrspace(1)* %cast
105  ret i8 %load
106}
107
108@lds0 = internal addrspace(3) global i8 123, align 4
109@gv = internal addrspace(1) global i8 123, align 4
110
111define i8 @ptrmask_cast_local_to_flat_global(i64 %mask) {
112; CHECK-LABEL: @ptrmask_cast_local_to_flat_global(
113; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* addrspacecast (i8 addrspace(3)* @lds0 to i8*), i64 [[MASK:%.*]])
114; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
115; CHECK-NEXT:    ret i8 [[LOAD]]
116;
117  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* addrspacecast (i8 addrspace(3)* @lds0 to i8*), i64 %mask)
118  %load = load i8, i8* %masked, align 1
119  ret i8 %load
120}
121
122define i8 @ptrmask_cast_global_to_flat_global(i64 %mask) {
123; CHECK-LABEL: @ptrmask_cast_global_to_flat_global(
124; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* @gv, i64 [[MASK:%.*]])
125; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1
126; CHECK-NEXT:    ret i8 [[LOAD]]
127;
128  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* addrspacecast (i8 addrspace(1)* @gv to i8*), i64 %mask)
129  %load = load i8, i8* %masked, align 1
130  ret i8 %load
131}
132
133define i8 @multi_ptrmask_cast_global_to_flat(i8 addrspace(1)* %src.ptr, i64 %mask) {
134; CHECK-LABEL: @multi_ptrmask_cast_global_to_flat(
135; CHECK-NEXT:    [[LOAD0:%.*]] = load i8, i8 addrspace(1)* [[SRC_PTR:%.*]], align 1
136; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[SRC_PTR]], i64 [[MASK:%.*]])
137; CHECK-NEXT:    [[LOAD1:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1
138; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
139; CHECK-NEXT:    ret i8 [[ADD]]
140;
141  %cast = addrspacecast i8 addrspace(1)* %src.ptr to i8*
142  %load0 = load i8, i8* %cast
143  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
144  %load1 = load i8, i8* %masked
145  %add = add i8 %load0, %load1
146  ret i8 %add
147}
148
149; Can't rewrite the ptrmask, but can rewrite other use instructions
150define i8 @multi_ptrmask_cast_local_to_flat(i8 addrspace(3)* %src.ptr, i64 %mask) {
151; CHECK-LABEL: @multi_ptrmask_cast_local_to_flat(
152; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(3)* [[SRC_PTR:%.*]] to i8*
153; CHECK-NEXT:    [[LOAD0:%.*]] = load i8, i8 addrspace(3)* [[SRC_PTR]], align 1
154; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]])
155; CHECK-NEXT:    [[LOAD1:%.*]] = load i8, i8* [[MASKED]], align 1
156; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
157; CHECK-NEXT:    ret i8 [[ADD]]
158;
159  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
160  %load0 = load i8, i8* %cast
161  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
162  %load1 = load i8, i8* %masked
163  %add = add i8 %load0, %load1
164  ret i8 %add
165}
166
167define i8 @multi_ptrmask_cast_region_to_flat(i8 addrspace(2)* %src.ptr, i64 %mask) {
168; CHECK-LABEL: @multi_ptrmask_cast_region_to_flat(
169; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(2)* [[SRC_PTR:%.*]] to i8*
170; CHECK-NEXT:    [[LOAD0:%.*]] = load i8, i8 addrspace(2)* [[SRC_PTR]], align 1
171; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]])
172; CHECK-NEXT:    [[LOAD1:%.*]] = load i8, i8* [[MASKED]], align 1
173; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
174; CHECK-NEXT:    ret i8 [[ADD]]
175;
176  %cast = addrspacecast i8 addrspace(2)* %src.ptr to i8*
177  %load0 = load i8, i8* %cast
178  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %mask)
179  %load1 = load i8, i8* %masked
180  %add = add i8 %load0, %load1
181  ret i8 %add
182}
183
184; Do not fold this since it clears a single high bit.
185define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(i8 addrspace(3)* %src.ptr) {
186; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(
187; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(3)* [[SRC_PTR:%.*]] to i8*
188; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 -4294967297)
189; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
190; CHECK-NEXT:    ret i8 [[LOAD]]
191;
192  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
193  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -4294967297)
194  %load = load i8, i8* %masked
195  ret i8 %load
196}
197
198; Do not fold this since it clears a single high bit.
199define i8 @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(i8 addrspace(3)* %src.ptr) {
200; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(
201; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(3)* [[SRC_PTR:%.*]] to i8*
202; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 9223372036854775807)
203; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
204; CHECK-NEXT:    ret i8 [[LOAD]]
205;
206  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
207  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 9223372036854775807)
208  %load = load i8, i8* %masked
209  ret i8 %load
210}
211
212define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(i8 addrspace(3)* %src.ptr) {
213; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(
214; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 0)
215; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
216; CHECK-NEXT:    ret i8 [[LOAD]]
217;
218  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
219  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -4294967296)
220  %load = load i8, i8* %masked
221  ret i8 %load
222}
223
224define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff80000000(i8 addrspace(3)* %src.ptr) {
225; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffff80000000(
226; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -2147483648)
227; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
228; CHECK-NEXT:    ret i8 [[LOAD]]
229;
230  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
231  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -2147483648)
232  %load = load i8, i8* %masked
233  ret i8 %load
234}
235
236; Test some align-down patterns. These only touch the low bits, which are preserved through the cast.
237define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffff0000(i8 addrspace(3)* %src.ptr) {
238; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffff0000(
239; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -65536)
240; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
241; CHECK-NEXT:    ret i8 [[LOAD]]
242;
243  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
244  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -65536)
245  %load = load i8, i8* %masked
246  ret i8 %load
247}
248
249define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffff00(i8 addrspace(3)* %src.ptr) {
250; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffff00(
251; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -256)
252; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
253; CHECK-NEXT:    ret i8 [[LOAD]]
254;
255  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
256  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -256)
257  %load = load i8, i8* %masked
258  ret i8 %load
259}
260
261define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffe0(i8 addrspace(3)* %src.ptr) {
262; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffe0(
263; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -32)
264; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
265; CHECK-NEXT:    ret i8 [[LOAD]]
266;
267  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
268  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -32)
269  %load = load i8, i8* %masked
270  ret i8 %load
271}
272
273define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffff0(i8 addrspace(3)* %src.ptr) {
274; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffffffffff0(
275; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -16)
276; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
277; CHECK-NEXT:    ret i8 [[LOAD]]
278;
279  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
280  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -16)
281  %load = load i8, i8* %masked
282  ret i8 %load
283}
284
285define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffff8(i8 addrspace(3)* %src.ptr) {
286; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffffffffff8(
287; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -8)
288; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
289; CHECK-NEXT:    ret i8 [[LOAD]]
290;
291  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
292  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -8)
293  %load = load i8, i8* %masked
294  ret i8 %load
295}
296
297define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffc(i8 addrspace(3)* %src.ptr) {
298; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffc(
299; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -4)
300; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
301; CHECK-NEXT:    ret i8 [[LOAD]]
302;
303  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
304  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -4)
305  %load = load i8, i8* %masked
306  ret i8 %load
307}
308
309define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(i8 addrspace(3)* %src.ptr) {
310; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(
311; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -2)
312; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
313; CHECK-NEXT:    ret i8 [[LOAD]]
314;
315  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
316  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -2)
317  %load = load i8, i8* %masked
318  ret i8 %load
319}
320
321define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(i8 addrspace(3)* %src.ptr) {
322; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(
323; CHECK-NEXT:    [[TMP1:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 -1)
324; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP1]], align 1
325; CHECK-NEXT:    ret i8 [[LOAD]]
326;
327  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
328  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 -1)
329  %load = load i8, i8* %masked
330  ret i8 %load
331}
332
333; Make sure non-constant masks can also be handled.
334define i8 @ptrmask_cast_local_to_flat_load_range_mask(i8 addrspace(3)* %src.ptr, i64 addrspace(1)* %mask.ptr) {
335; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask(
336; CHECK-NEXT:    [[LOAD_MASK:%.*]] = load i64, i64 addrspace(1)* [[MASK_PTR:%.*]], align 8, !range !0
337; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32
338; CHECK-NEXT:    [[TMP2:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* [[SRC_PTR:%.*]], i32 [[TMP1]])
339; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(3)* [[TMP2]], align 1
340; CHECK-NEXT:    ret i8 [[LOAD]]
341;
342  %load.mask = load i64, i64 addrspace(1)* %mask.ptr, align 8, !range !0
343  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
344  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %cast, i64 %load.mask)
345  %load = load i8, i8* %masked
346  ret i8 %load
347}
348
349; This should not be folded, as the mask is implicitly zero extended,
350; so it would clear the high bits.
351define i8 @ptrmask_cast_local_to_flat_const_mask_32bit_neg4(i8 addrspace(3)* %src.ptr) {
352; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_32bit_neg4(
353; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast i8 addrspace(3)* [[SRC_PTR:%.*]] to i8*
354; CHECK-NEXT:    [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i32(i8* [[CAST]], i32 -4)
355; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1
356; CHECK-NEXT:    ret i8 [[LOAD]]
357;
358  %cast = addrspacecast i8 addrspace(3)* %src.ptr to i8*
359  %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %cast, i32 -4)
360  %load = load i8, i8* %masked
361  ret i8 %load
362}
363
364declare i8* @llvm.ptrmask.p0i8.i64(i8*, i64) #0
365declare i8* @llvm.ptrmask.p0i8.i32(i8*, i32) #0
366declare i8 addrspace(5)* @llvm.ptrmask.p5i8.i32(i8 addrspace(5)*, i32) #0
367declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0
368declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0
369
370attributes #0 = { nounwind readnone speculatable willreturn }
371
372!0 = !{i64 -64, i64 -1}
373