1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s 2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s 3 4;CHECK-LABEL: {{^}}buffer_store: 5;CHECK-NOT: s_waitcnt 6;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc 8;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc 9define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { 10main_body: 11 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i32 0) 12 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 1) 13 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 2) 14 ret void 15} 16 17;CHECK-LABEL: {{^}}buffer_store_immoffs: 18;CHECK-NOT: s_waitcnt 19;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42 20define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) { 21main_body: 22 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0) 23 ret void 24} 25 26;CHECK-LABEL: {{^}}buffer_store_ofs: 27;CHECK-NOT: s_waitcnt 28;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 29define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) { 30main_body: 31 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0) 32 ret void 33} 34 35; Ideally, the register allocator would avoid the wait here 36; 37;CHECK-LABEL: {{^}}buffer_store_wait: 38;CHECK-NOT: s_waitcnt 39;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 40;VERDE: s_waitcnt expcnt(0) 41;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen 42;CHECK: s_waitcnt vmcnt(0) 43;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen 44define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) { 45main_body: 46 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0) 47 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %3, i32 0, i32 0) 48 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i32 0) 49 ret void 50} 51 52;CHECK-LABEL: {{^}}buffer_store_x1: 53;CHECK-NOT: s_waitcnt 54;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen 55define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %offset) { 56main_body: 57 call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 58 ret void 59} 60 61;CHECK-LABEL: {{^}}buffer_store_x2: 62;CHECK-NOT: s_waitcnt 63;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 64define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) #0 { 65main_body: 66 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 67 ret void 68} 69 70;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_and: 71;CHECK-NOT: s_waitcnt 72;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 73;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 74define amdgpu_ps void @buffer_store_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 75 %a1 = add i32 %a, 4 76 %a2 = add i32 %a, 8 77 %a3 = add i32 %a, 12 78 %a4 = add i32 %a, 16 79 %a5 = add i32 %a, 28 80 %a6 = add i32 %a, 32 81 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0) 82 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0) 83 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0) 84 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0) 85 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0) 86 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0) 87 ret void 88} 89 90;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_or: 91;CHECK-NOT: s_waitcnt 92;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4 93;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28 94define amdgpu_ps void @buffer_store_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 95 %a = shl i32 %inp, 6 96 %a1 = add i32 %a, 4 97 %a2 = add i32 %a, 8 98 %a3 = add i32 %a, 12 99 %a4 = add i32 %a, 16 100 %a5 = add i32 %a, 28 101 %a6 = add i32 %a, 32 102 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0) 103 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0) 104 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 0) 105 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 0) 106 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 0) 107 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 0) 108 ret void 109} 110 111 112;CHECK-LABEL: {{^}}buffer_store_x1_offen_merged_glc_slc: 113;CHECK-NOT: s_waitcnt 114;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}} 115;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}} 116;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}} 117define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 118 %a1 = add i32 %a, 4 119 %a2 = add i32 %a, 8 120 %a3 = add i32 %a, 12 121 %a4 = add i32 %a, 16 122 %a5 = add i32 %a, 28 123 %a6 = add i32 %a, 32 124 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0) 125 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0) 126 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 %a3, i32 0, i32 1) 127 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 %a4, i32 0, i32 1) 128 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 %a5, i32 0, i32 3) 129 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 %a6, i32 0, i32 3) 130 ret void 131} 132 133;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_and: 134;CHECK-NOT: s_waitcnt 135;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 136define amdgpu_ps void @buffer_store_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) { 137 %a1 = add i32 %a, 4 138 %a2 = add i32 %a, 12 139 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0) 140 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0) 141 ret void 142} 143 144;CHECK-LABEL: {{^}}buffer_store_x2_offen_merged_or: 145;CHECK-NOT: s_waitcnt 146;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4 147define amdgpu_ps void @buffer_store_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) { 148 %a = shl i32 %inp, 4 149 %a1 = add i32 %a, 4 150 %a2 = add i32 %a, 12 151 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 %a1, i32 0, i32 0) 152 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 %a2, i32 0, i32 0) 153 ret void 154} 155 156;CHECK-LABEL: {{^}}buffer_store_x1_offset_merged: 157;CHECK-NOT: s_waitcnt 158;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4 159;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28 160define amdgpu_ps void @buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 161 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0) 162 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0) 163 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0) 164 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0) 165 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0) 166 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0) 167 ret void 168} 169 170;CHECK-LABEL: {{^}}buffer_store_x2_offset_merged: 171;CHECK-NOT: s_waitcnt 172;CHECK: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4 173define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x float> %v1,<2 x float> %v2) { 174 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0) 175 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %v2, <4 x i32> %rsrc, i32 12, i32 0, i32 0) 176 ret void 177} 178 179;CHECK-LABEL: {{^}}buffer_store_int: 180;CHECK-NOT: s_waitcnt 181;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 182;CHECK: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc 183;CHECK: buffer_store_dword v6, off, s[0:3], 0 slc 184define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) { 185main_body: 186 call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0) 187 call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1) 188 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2) 189 ret void 190} 191 192;CHECK-LABEL: {{^}}raw_buffer_store_byte: 193;CHECK-NEXT: %bb. 194;CHECK-NEXT: v_cvt_u32_f32_e32 v{{[0-9]}}, v{{[0-9]}} 195;CHECK-NEXT: buffer_store_byte v{{[0-9]}}, off, s[0:3], 0 196;CHECK-NEXT: s_endpgm 197define amdgpu_ps void @raw_buffer_store_byte(<4 x i32> inreg %rsrc, float %v1) { 198main_body: 199 %v2 = fptoui float %v1 to i32 200 %v3 = trunc i32 %v2 to i8 201 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %v3, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 202 ret void 203} 204 205;CHECK-LABEL: {{^}}raw_buffer_store_short: 206;CHECK-NEXT: %bb. 207;CHECK-NEXT: v_cvt_u32_f32_e32 v{{[0-9]}}, v{{[0-9]}} 208;CHECK-NEXT: buffer_store_short v{{[0-9]}}, off, s[0:3], 0 209;CHECK-NEXT: s_endpgm 210define amdgpu_ps void @raw_buffer_store_short(<4 x i32> inreg %rsrc, float %v1) { 211main_body: 212 %v2 = fptoui float %v1 to i32 213 %v3 = trunc i32 %v2 to i16 214 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %v3, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 215 ret void 216} 217 218;CHECK-LABEL: {{^}}raw_buffer_store_f16: 219;CHECK-NEXT: %bb. 220;CHECK-NOT: v0 221;CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0 222;CHECK-NEXT: s_endpgm 223define amdgpu_ps void @raw_buffer_store_f16(<4 x i32> inreg %rsrc, i32 %v1) { 224main_body: 225 %trunc = trunc i32 %v1 to i16 226 %cast = bitcast i16 %trunc to half 227 call void @llvm.amdgcn.raw.buffer.store.f16(half %cast, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 228 ret void 229} 230 231;CHECK-LABEL: {{^}}buffer_store_v2f16: 232;CHECK-NOT: s_waitcnt 233;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen 234define amdgpu_ps void @buffer_store_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %offset) { 235main_body: 236 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 237 ret void 238} 239 240;CHECK-LABEL: {{^}}buffer_store_v4f16: 241;CHECK-NOT: s_waitcnt 242;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 243define amdgpu_ps void @buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %data, i32 %offset) #0 { 244main_body: 245 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 246 ret void 247} 248 249;CHECK-LABEL: {{^}}raw_buffer_store_i16: 250;CHECK-NEXT: %bb. 251;CHECK-NOT: v0 252;CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0 253;CHECK-NEXT: s_endpgm 254define amdgpu_ps void @raw_buffer_store_i16(<4 x i32> inreg %rsrc, i32 %v1) { 255main_body: 256 %trunc = trunc i32 %v1 to i16 257 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %trunc, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 258 ret void 259} 260 261;CHECK-LABEL: {{^}}buffer_store_v2i16: 262;CHECK-NOT: s_waitcnt 263;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen 264define amdgpu_ps void @buffer_store_v2i16(<4 x i32> inreg %rsrc, <2 x i16> %data, i32 %offset) { 265main_body: 266 call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 267 ret void 268} 269 270;CHECK-LABEL: {{^}}buffer_store_v4i16: 271;CHECK-NOT: s_waitcnt 272;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 273define amdgpu_ps void @buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %data, i32 %offset) #0 { 274main_body: 275 call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0) 276 ret void 277} 278 279;CHECK-LABEL: {{^}}raw_buffer_store_x1_offset_merged: 280;CHECK-NOT: s_waitcnt 281;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4 282;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28 283define amdgpu_ps void @raw_buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 284 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0) 285 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0) 286 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0) 287 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0) 288 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0) 289 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0) 290 ret void 291} 292 293;CHECK-LABEL: {{^}}raw_buffer_store_x1_offset_swizzled_not_merged: 294;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:4 295;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:8 296;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:12 297;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:16 298;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:28 299;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:32 300define amdgpu_ps void @raw_buffer_store_x1_offset_swizzled_not_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 301 call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 8) 302 call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 8) 303 call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 8) 304 call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 8) 305 call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 8) 306 call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 8) 307 ret void 308} 309 310declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0 311declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0 312declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0 313declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0 314declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0 315declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0 316declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1 317declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) #0 318declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) #0 319declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) #0 320declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) #0 321declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) #0 322declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) #0 323declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) #0 324 325attributes #0 = { nounwind } 326attributes #1 = { nounwind readonly } 327