1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; Legacy intrinsics that just read implicit parameters 6 7; FUNC-LABEL: {{^}}workdim_legacy: 8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb 9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c 10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 11; GCN-NOHSA: buffer_store_dword [[VVAL]] 12 13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 14; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z 15define void @workdim_legacy (i32 addrspace(1)* %out) { 16entry: 17 %0 = call i32 @llvm.AMDGPU.read.workdim() #0 18 store i32 %0, i32 addrspace(1)* %out 19 ret void 20} 21 22; FUNC-LABEL: {{^}}ngroups_x: 23; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 26; GCN-NOHSA: buffer_store_dword [[VVAL]] 27 28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 29; EG: MOV {{\*? *}}[[VAL]], KC0[0].X 30define void @ngroups_x (i32 addrspace(1)* %out) { 31entry: 32 %0 = call i32 @llvm.r600.read.ngroups.x() #0 33 store i32 %0, i32 addrspace(1)* %out 34 ret void 35} 36 37; FUNC-LABEL: {{^}}ngroups_y: 38; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 41; GCN-NOHSA: buffer_store_dword [[VVAL]] 42 43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y 45define void @ngroups_y (i32 addrspace(1)* %out) { 46entry: 47 %0 = call i32 @llvm.r600.read.ngroups.y() #0 48 store i32 %0, i32 addrspace(1)* %out 49 ret void 50} 51 52; FUNC-LABEL: {{^}}ngroups_z: 53; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 56; GCN-NOHSA: buffer_store_dword [[VVAL]] 57 58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 59; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z 60define void @ngroups_z (i32 addrspace(1)* %out) { 61entry: 62 %0 = call i32 @llvm.r600.read.ngroups.z() #0 63 store i32 %0, i32 addrspace(1)* %out 64 ret void 65} 66 67; FUNC-LABEL: {{^}}global_size_x: 68; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc 70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 71; GCN-NOHSA: buffer_store_dword [[VVAL]] 72 73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 74; EG: MOV {{\*? *}}[[VAL]], KC0[0].W 75define void @global_size_x (i32 addrspace(1)* %out) { 76entry: 77 %0 = call i32 @llvm.r600.read.global.size.x() #0 78 store i32 %0, i32 addrspace(1)* %out 79 ret void 80} 81 82; FUNC-LABEL: {{^}}global_size_y: 83; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 86; GCN-NOHSA: buffer_store_dword [[VVAL]] 87 88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 89; EG: MOV {{\*? *}}[[VAL]], KC0[1].X 90define void @global_size_y (i32 addrspace(1)* %out) { 91entry: 92 %0 = call i32 @llvm.r600.read.global.size.y() #0 93 store i32 %0, i32 addrspace(1)* %out 94 ret void 95} 96 97; FUNC-LABEL: {{^}}global_size_z: 98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 101; GCN-NOHSA: buffer_store_dword [[VVAL]] 102 103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y 105define void @global_size_z (i32 addrspace(1)* %out) { 106entry: 107 %0 = call i32 @llvm.r600.read.global.size.z() #0 108 store i32 %0, i32 addrspace(1)* %out 109 ret void 110} 111 112; FUNC-LABEL: {{^}}local_size_x: 113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 116; GCN-NOHSA: buffer_store_dword [[VVAL]] 117 118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 119; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z 120define void @local_size_x (i32 addrspace(1)* %out) { 121entry: 122 %0 = call i32 @llvm.r600.read.local.size.x() #0 123 store i32 %0, i32 addrspace(1)* %out 124 ret void 125} 126 127; FUNC-LABEL: {{^}}local_size_y: 128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 131; GCN-NOHSA: buffer_store_dword [[VVAL]] 132 133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 134; EG: MOV {{\*? *}}[[VAL]], KC0[1].W 135define void @local_size_y (i32 addrspace(1)* %out) { 136entry: 137 %0 = call i32 @llvm.r600.read.local.size.y() #0 138 store i32 %0, i32 addrspace(1)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}local_size_z: 143; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 144; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 145; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 146; GCN-NOHSA: buffer_store_dword [[VVAL]] 147 148; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 149; EG: MOV {{\*? *}}[[VAL]], KC0[2].X 150define void @local_size_z (i32 addrspace(1)* %out) { 151entry: 152 %0 = call i32 @llvm.r600.read.local.size.z() #0 153 store i32 %0, i32 addrspace(1)* %out 154 ret void 155} 156 157; Legacy use of r600 intrinsics by GCN 158 159; The tgid values are stored in sgprs offset by the number of user 160; sgprs. 161 162; FUNC-LABEL: {{^}}tgid_x_legacy: 163; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} 164; GCN-NOHSA: buffer_store_dword [[VVAL]] 165 166; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 167; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 168; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 169; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 170; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 171define void @tgid_x_legacy(i32 addrspace(1)* %out) { 172entry: 173 %0 = call i32 @llvm.r600.read.tgid.x() #0 174 store i32 %0, i32 addrspace(1)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}tgid_y_legacy: 179; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 180; GCN-NOHSA: buffer_store_dword [[VVAL]] 181 182; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 183define void @tgid_y_legacy(i32 addrspace(1)* %out) { 184entry: 185 %0 = call i32 @llvm.r600.read.tgid.y() #0 186 store i32 %0, i32 addrspace(1)* %out 187 ret void 188} 189 190; FUNC-LABEL: {{^}}tgid_z_legacy: 191; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} 192; GCN-NOHSA: buffer_store_dword [[VVAL]] 193 194; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 195; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 196; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 197; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 198; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 199define void @tgid_z_legacy(i32 addrspace(1)* %out) { 200entry: 201 %0 = call i32 @llvm.r600.read.tgid.z() #0 202 store i32 %0, i32 addrspace(1)* %out 203 ret void 204} 205 206; GCN-NOHSA: .section .AMDGPU.config 207; GCN-NOHSA: .long 47180 208; GCN-NOHSA-NEXT: .long 132{{$}} 209 210; FUNC-LABEL: {{^}}tidig_x_legacy: 211; GCN-NOHSA: buffer_store_dword v0 212define void @tidig_x_legacy(i32 addrspace(1)* %out) { 213entry: 214 %0 = call i32 @llvm.r600.read.tidig.x() #0 215 store i32 %0, i32 addrspace(1)* %out 216 ret void 217} 218 219; GCN-NOHSA: .section .AMDGPU.config 220; GCN-NOHSA: .long 47180 221; GCN-NOHSA-NEXT: .long 2180{{$}} 222 223; FUNC-LABEL: {{^}}tidig_y_legacy: 224 225; GCN-NOHSA: buffer_store_dword v1 226define void @tidig_y_legacy(i32 addrspace(1)* %out) { 227entry: 228 %0 = call i32 @llvm.r600.read.tidig.y() #0 229 store i32 %0, i32 addrspace(1)* %out 230 ret void 231} 232 233; GCN-NOHSA: .section .AMDGPU.config 234; GCN-NOHSA: .long 47180 235; GCN-NOHSA-NEXT: .long 4228{{$}} 236 237; FUNC-LABEL: {{^}}tidig_z_legacy: 238; GCN-NOHSA: buffer_store_dword v2 239define void @tidig_z_legacy(i32 addrspace(1)* %out) { 240entry: 241 %0 = call i32 @llvm.r600.read.tidig.z() #0 242 store i32 %0, i32 addrspace(1)* %out 243 ret void 244} 245 246declare i32 @llvm.r600.read.ngroups.x() #0 247declare i32 @llvm.r600.read.ngroups.y() #0 248declare i32 @llvm.r600.read.ngroups.z() #0 249 250declare i32 @llvm.r600.read.global.size.x() #0 251declare i32 @llvm.r600.read.global.size.y() #0 252declare i32 @llvm.r600.read.global.size.z() #0 253 254declare i32 @llvm.r600.read.local.size.x() #0 255declare i32 @llvm.r600.read.local.size.y() #0 256declare i32 @llvm.r600.read.local.size.z() #0 257 258declare i32 @llvm.r600.read.tgid.x() #0 259declare i32 @llvm.r600.read.tgid.y() #0 260declare i32 @llvm.r600.read.tgid.z() #0 261 262declare i32 @llvm.r600.read.tidig.x() #0 263declare i32 @llvm.r600.read.tidig.y() #0 264declare i32 @llvm.r600.read.tidig.z() #0 265 266declare i32 @llvm.AMDGPU.read.workdim() #0 267 268attributes #0 = { readnone } 269