1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 6; FUNC-LABEL: {{^}}local_size_x: 7; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 8; EG: MOV * [[VAL]], KC0[1].Z 9 10; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 11; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 12; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1 13; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4 14 15; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 16; GCN: buffer_store_dword [[VVAL]] 17define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) { 18entry: 19 %0 = call i32 @llvm.r600.read.local.size.x() #0 20 store i32 %0, i32 addrspace(1)* %out 21 ret void 22} 23 24; FUNC-LABEL: {{^}}local_size_y: 25; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 26; EG: MOV * [[VAL]], KC0[1].W 27 28; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 29; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 30; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 31; GCN: buffer_store_dword [[VVAL]] 32define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) { 33entry: 34 %0 = call i32 @llvm.r600.read.local.size.y() #0 35 store i32 %0, i32 addrspace(1)* %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}local_size_z: 40; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 41; EG: MOV * [[VAL]], KC0[2].X 42 43; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 44; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 45; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 46; GCN: buffer_store_dword [[VVAL]] 47define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) { 48entry: 49 %0 = call i32 @llvm.r600.read.local.size.z() #0 50 store i32 %0, i32 addrspace(1)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}local_size_xy: 55; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 56; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 57; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 58; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 59; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] 60; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]] 61; GCN: buffer_store_dword [[VAL]] 62define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) { 63entry: 64 %x = call i32 @llvm.r600.read.local.size.x() #0 65 %y = call i32 @llvm.r600.read.local.size.y() #0 66 %val = mul i32 %x, %y 67 store i32 %val, i32 addrspace(1)* %out 68 ret void 69} 70 71; FUNC-LABEL: {{^}}local_size_xz: 72 73; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 74; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 75; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 76; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 77; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff 78; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 79; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]] 80; GCN: buffer_store_dword [[VAL]] 81define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) { 82entry: 83 %x = call i32 @llvm.r600.read.local.size.x() #0 84 %z = call i32 @llvm.r600.read.local.size.z() #0 85 %val = mul i32 %x, %z 86 store i32 %val, i32 addrspace(1)* %out 87 ret void 88} 89 90; FUNC-LABEL: {{^}}local_size_yz: 91; HSA: enable_sgpr_private_segment_buffer = 1 92; HSA: enable_sgpr_dispatch_ptr = 1 93 94; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 95; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 96; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 97; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 98; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 99; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]] 100; GCN: buffer_store_dword [[VAL]] 101define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) { 102entry: 103 %y = call i32 @llvm.r600.read.local.size.y() #0 104 %z = call i32 @llvm.r600.read.local.size.z() #0 105 %val = mul i32 %y, %z 106 store i32 %val, i32 addrspace(1)* %out 107 ret void 108} 109 110; FUNC-LABEL: {{^}}local_size_xyz: 111; HSA: enable_sgpr_private_segment_buffer = 1 112; HSA: enable_sgpr_dispatch_ptr = 1 113 114; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 115; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 116; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 117; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 118; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 119; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 120; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] 121; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 122; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]] 123; GCN: buffer_store_dword [[VAL]] 124define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) { 125entry: 126 %x = call i32 @llvm.r600.read.local.size.x() #0 127 %y = call i32 @llvm.r600.read.local.size.y() #0 128 %z = call i32 @llvm.r600.read.local.size.z() #0 129 %xy = mul i32 %x, %y 130 %xyz = add i32 %xy, %z 131 store i32 %xyz, i32 addrspace(1)* %out 132 ret void 133} 134 135; FUNC-LABEL: {{^}}local_size_x_known_bits: 136; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 137; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 138; GCN-NOT: 0xffff 139; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 140; GCN-NEXT: buffer_store_dword [[VVAL]] 141define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) { 142entry: 143 %size = call i32 @llvm.r600.read.local.size.x() #0 144 %shl = shl i32 %size, 16 145 %shr = lshr i32 %shl, 16 146 store i32 %shr, i32 addrspace(1)* %out 147 ret void 148} 149 150; FUNC-LABEL: {{^}}local_size_y_known_bits: 151; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 152; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 153; GCN-NOT: 0xffff 154; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 155; GCN-NEXT: buffer_store_dword [[VVAL]] 156define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) { 157entry: 158 %size = call i32 @llvm.r600.read.local.size.y() #0 159 %shl = shl i32 %size, 16 160 %shr = lshr i32 %shl, 16 161 store i32 %shr, i32 addrspace(1)* %out 162 ret void 163} 164 165; FUNC-LABEL: {{^}}local_size_z_known_bits: 166; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 167; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 168; GCN-NOT: 0xffff 169; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 170; GCN-NEXT: buffer_store_dword [[VVAL]] 171define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) { 172entry: 173 %size = call i32 @llvm.r600.read.local.size.z() #0 174 %shl = shl i32 %size, 16 175 %shr = lshr i32 %shl, 16 176 store i32 %shr, i32 addrspace(1)* %out 177 ret void 178} 179 180declare i32 @llvm.r600.read.local.size.x() #0 181declare i32 @llvm.r600.read.local.size.y() #0 182declare i32 @llvm.r600.read.local.size.z() #0 183 184attributes #0 = { nounwind readnone } 185