1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3
4; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
5; GCN: s_waitcnt
6; GFX9-NOT: s_mov_b32 m0
7; CI-NEXT: s_mov_b32 m0
8; GCN-NEXT: ds_read_b32 v0, v0{{$}}
9; GCN-NEXT: s_waitcnt lgkmcnt(0)
10; GCN-NEXT: s_setpc_b64
11define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
12  %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
13  ret i32 %load
14}
15
16; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
17; GCN: s_waitcnt
18; GFX9-NOT: s_mov_b32 m0
19; CI-NEXT: s_mov_b32 m0
20; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
21; GCN-NEXT: s_waitcnt lgkmcnt(0)
22; GCN-NEXT: s_setpc_b64
23define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
24  %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
25  %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
26  ret i32 %load
27}
28
29; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
30; GCN: s_waitcnt
31; GFX9-NOT: s_mov_b32 m0
32; CI-NEXT: s_mov_b32 m0
33; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
34; GCN-NEXT: s_waitcnt lgkmcnt(0)
35; GCN-NEXT: s_setpc_b64
36define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
37  %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
38  ret i64 %load
39}
40
41; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
42; GCN: s_waitcnt
43; GFX9-NOT: s_mov_b32 m0
44; CI-NEXT: s_mov_b32 m0
45; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
46; GCN-NEXT: s_waitcnt lgkmcnt(0)
47; GCN-NEXT: s_setpc_b64
48define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
49  %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
50  %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
51  ret i64 %load
52}
53
54; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
55; GCN: s_waitcnt
56; GFX9-NOT: s_mov_b32 m0
57; CI-NEXT: s_mov_b32 m0
58; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
59; GCN-NEXT: s_waitcnt lgkmcnt(0)
60; GCN-NEXT: s_setpc_b64
61define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
62  %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
63  %load = load atomic float, float addrspace(3)* %gep monotonic, align 4
64  ret float %load
65}
66
67; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
68; GCN: s_waitcnt
69; GFX9-NOT: s_mov_b32 m0
70; CI-NEXT: s_mov_b32 m0
71; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
72; GCN-NEXT: s_waitcnt lgkmcnt(0)
73; GCN-NEXT: s_setpc_b64
74define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
75  %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
76  %load = load atomic double, double addrspace(3)* %gep monotonic, align 8
77  ret double %load
78}
79
80; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
81; GCN: s_waitcnt
82; GFX9-NOT: s_mov_b32 m0
83; CI-NEXT: s_mov_b32 m0
84; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
85; GCN-NEXT: s_waitcnt lgkmcnt(0)
86; GCN-NEXT: s_setpc_b64
87define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
88  %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
89  %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
90  ret i8* %load
91}
92
93; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
94; GCN: s_waitcnt
95; GFX9-NOT: s_mov_b32 m0
96; CI-NEXT: s_mov_b32 m0
97; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
98; GCN-NEXT: s_waitcnt lgkmcnt(0)
99; GCN-NEXT: s_setpc_b64
100define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
101  %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
102  %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
103  ret i8 addrspace(3)* %load
104}
105