1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; Legacy intrinsics that just read implicit parameters
6
7; FUNC-LABEL: {{^}}ngroups_x:
8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11; GCN-NOHSA: buffer_store_dword [[VVAL]]
12
13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
14; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
15define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
16entry:
17  %0 = call i32 @llvm.r600.read.ngroups.x() #0
18  store i32 %0, i32 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}ngroups_y:
23; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
26; GCN-NOHSA: buffer_store_dword [[VVAL]]
27
28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
29; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
30define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
31entry:
32  %0 = call i32 @llvm.r600.read.ngroups.y() #0
33  store i32 %0, i32 addrspace(1)* %out
34  ret void
35}
36
37; FUNC-LABEL: {{^}}ngroups_z:
38; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
41; GCN-NOHSA: buffer_store_dword [[VVAL]]
42
43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
45define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
46entry:
47  %0 = call i32 @llvm.r600.read.ngroups.z() #0
48  store i32 %0, i32 addrspace(1)* %out
49  ret void
50}
51
52; FUNC-LABEL: {{^}}global_size_x:
53; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
56; GCN-NOHSA: buffer_store_dword [[VVAL]]
57
58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
59; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
60define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
61entry:
62  %0 = call i32 @llvm.r600.read.global.size.x() #0
63  store i32 %0, i32 addrspace(1)* %out
64  ret void
65}
66
67; FUNC-LABEL: {{^}}global_size_y:
68; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
71; GCN-NOHSA: buffer_store_dword [[VVAL]]
72
73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
74; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
75define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
76entry:
77  %0 = call i32 @llvm.r600.read.global.size.y() #0
78  store i32 %0, i32 addrspace(1)* %out
79  ret void
80}
81
82; FUNC-LABEL: {{^}}global_size_z:
83; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
86; GCN-NOHSA: buffer_store_dword [[VVAL]]
87
88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
90define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
91entry:
92  %0 = call i32 @llvm.r600.read.global.size.z() #0
93  store i32 %0, i32 addrspace(1)* %out
94  ret void
95}
96
97; FUNC-LABEL: {{^}}local_size_x:
98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
101; GCN-NOHSA: buffer_store_dword [[VVAL]]
102
103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
105define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
106entry:
107  %0 = call i32 @llvm.r600.read.local.size.x() #0
108  store i32 %0, i32 addrspace(1)* %out
109  ret void
110}
111
112; FUNC-LABEL: {{^}}local_size_y:
113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
116; GCN-NOHSA: buffer_store_dword [[VVAL]]
117
118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
120define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
121entry:
122  %0 = call i32 @llvm.r600.read.local.size.y() #0
123  store i32 %0, i32 addrspace(1)* %out
124  ret void
125}
126
127; FUNC-LABEL: {{^}}local_size_z:
128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
131; GCN-NOHSA: buffer_store_dword [[VVAL]]
132
133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
134; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
135define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
136entry:
137  %0 = call i32 @llvm.r600.read.local.size.z() #0
138  store i32 %0, i32 addrspace(1)* %out
139  ret void
140}
141
142declare i32 @llvm.r600.read.ngroups.x() #0
143declare i32 @llvm.r600.read.ngroups.y() #0
144declare i32 @llvm.r600.read.ngroups.z() #0
145
146declare i32 @llvm.r600.read.global.size.x() #0
147declare i32 @llvm.r600.read.global.size.y() #0
148declare i32 @llvm.r600.read.global.size.z() #0
149
150declare i32 @llvm.r600.read.local.size.x() #0
151declare i32 @llvm.r600.read.local.size.y() #0
152declare i32 @llvm.r600.read.local.size.z() #0
153
154attributes #0 = { readnone }
155