1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 4 5; GCN-LABEL: {{^}}load_1d: 6; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 7define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { 8main_body: 9 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 10 ret <4 x float> %v 11} 12 13; GCN-LABEL: {{^}}load_2d: 14; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} 15define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 16main_body: 17 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 18 ret <4 x float> %v 19} 20 21; GCN-LABEL: {{^}}load_3d: 22; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 23define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 24main_body: 25 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 26 ret <4 x float> %v 27} 28 29; GCN-LABEL: {{^}}load_cube: 30; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 31define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 32main_body: 33 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 34 ret <4 x float> %v 35} 36 37; GCN-LABEL: {{^}}load_1darray: 38; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}} 39define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { 40main_body: 41 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 42 ret <4 x float> %v 43} 44 45; GCN-LABEL: {{^}}load_2darray: 46; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 47define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 48main_body: 49 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 50 ret <4 x float> %v 51} 52 53; GCN-LABEL: {{^}}load_2dmsaa: 54; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 55define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { 56main_body: 57 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 58 ret <4 x float> %v 59} 60 61; GCN-LABEL: {{^}}load_2darraymsaa: 62; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 63define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 64main_body: 65 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 66 ret <4 x float> %v 67} 68 69; GCN-LABEL: {{^}}load_mip_1d: 70; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} 71define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) { 72main_body: 73 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 74 ret <4 x float> %v 75} 76 77; GCN-LABEL: {{^}}load_mip_2d: 78; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 79define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { 80main_body: 81 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 82 ret <4 x float> %v 83} 84 85; GCN-LABEL: {{^}}load_mip_3d: 86; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 87define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) { 88main_body: 89 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 90 ret <4 x float> %v 91} 92 93; GCN-LABEL: {{^}}load_mip_cube: 94; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 95define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { 96main_body: 97 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 98 ret <4 x float> %v 99} 100 101; GCN-LABEL: {{^}}load_mip_1darray: 102; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 103define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) { 104main_body: 105 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 106 ret <4 x float> %v 107} 108 109; GCN-LABEL: {{^}}load_mip_2darray: 110; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 111define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { 112main_body: 113 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 114 ret <4 x float> %v 115} 116 117; GCN-LABEL: {{^}}store_1d: 118; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} 119define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 120main_body: 121 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 122 ret void 123} 124 125; GCN-LABEL: {{^}}store_2d: 126; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} 127define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { 128main_body: 129 call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 130 ret void 131} 132 133; GCN-LABEL: {{^}}store_3d: 134; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 135define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) { 136main_body: 137 call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 138 ret void 139} 140 141; GCN-LABEL: {{^}}store_cube: 142; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 143define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 144main_body: 145 call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 146 ret void 147} 148 149; GCN-LABEL: {{^}}store_1darray: 150; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}} 151define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) { 152main_body: 153 call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 154 ret void 155} 156 157; GCN-LABEL: {{^}}store_2darray: 158; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 159define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 160main_body: 161 call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 162 ret void 163} 164 165; GCN-LABEL: {{^}}store_2dmsaa: 166; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 167define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) { 168main_body: 169 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 170 ret void 171} 172 173; GCN-LABEL: {{^}}store_2darraymsaa: 174; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 175define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 176main_body: 177 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 178 ret void 179} 180 181; GCN-LABEL: {{^}}store_mip_1d: 182; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} 183define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) { 184main_body: 185 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 186 ret void 187} 188 189; GCN-LABEL: {{^}}store_mip_2d: 190; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 191define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) { 192main_body: 193 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 194 ret void 195} 196 197; GCN-LABEL: {{^}}store_mip_3d: 198; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 199define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) { 200main_body: 201 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 202 ret void 203} 204 205; GCN-LABEL: {{^}}store_mip_cube: 206; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 207define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { 208main_body: 209 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 210 ret void 211} 212 213; GCN-LABEL: {{^}}store_mip_1darray: 214; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 215define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) { 216main_body: 217 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 218 ret void 219} 220 221; GCN-LABEL: {{^}}store_mip_2darray: 222; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 223define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { 224main_body: 225 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 226 ret void 227} 228 229; GCN-LABEL: {{^}}getresinfo_1d: 230; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 231define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) { 232main_body: 233 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 234 ret <4 x float> %v 235} 236 237; GCN-LABEL: {{^}}getresinfo_2d: 238; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 239define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) { 240main_body: 241 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 242 ret <4 x float> %v 243} 244 245; GCN-LABEL: {{^}}getresinfo_3d: 246; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 247define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) { 248main_body: 249 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 250 ret <4 x float> %v 251} 252 253; GCN-LABEL: {{^}}getresinfo_cube: 254; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 255define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) { 256main_body: 257 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 258 ret <4 x float> %v 259} 260 261; GCN-LABEL: {{^}}getresinfo_1darray: 262; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 263define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) { 264main_body: 265 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 266 ret <4 x float> %v 267} 268 269; GCN-LABEL: {{^}}getresinfo_2darray: 270; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 271define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) { 272main_body: 273 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 274 ret <4 x float> %v 275} 276 277; GCN-LABEL: {{^}}getresinfo_2dmsaa: 278; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 279define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) { 280main_body: 281 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 282 ret <4 x float> %v 283} 284 285; GCN-LABEL: {{^}}getresinfo_2darraymsaa: 286; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 287define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) { 288main_body: 289 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 290 ret <4 x float> %v 291} 292 293; GCN-LABEL: {{^}}load_1d_V1: 294; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}} 295define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { 296main_body: 297 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 298 ret float %v 299} 300 301; GCN-LABEL: {{^}}load_1d_V2: 302; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}} 303define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { 304main_body: 305 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 306 ret <2 x float> %v 307} 308 309; GCN-LABEL: {{^}}store_1d_V1: 310; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} 311define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { 312main_body: 313 call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 314 ret void 315} 316 317; GCN-LABEL: {{^}}store_1d_V2: 318; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}} 319define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) { 320main_body: 321 call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 322 ret void 323} 324 325; GCN-LABEL: {{^}}load_1d_glc: 326; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}} 327define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { 328main_body: 329 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 330 ret <4 x float> %v 331} 332 333; GCN-LABEL: {{^}}load_1d_slc: 334; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}} 335define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { 336main_body: 337 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 338 ret <4 x float> %v 339} 340 341; GCN-LABEL: {{^}}load_1d_glc_slc: 342; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}} 343define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { 344main_body: 345 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 346 ret <4 x float> %v 347} 348 349; GCN-LABEL: {{^}}store_1d_glc: 350; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} 351define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 352main_body: 353 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 354 ret void 355} 356 357; GCN-LABEL: {{^}}store_1d_slc: 358; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}} 359define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 360main_body: 361 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 362 ret void 363} 364 365; GCN-LABEL: {{^}}store_1d_glc_slc: 366; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}} 367define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 368main_body: 369 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 370 ret void 371} 372 373; GCN-LABEL: {{^}}getresinfo_dmask0: 374; GCN-NOT: image 375; GCN: ; return to shader part epilog 376define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 { 377main_body: 378 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 379 ret <4 x float> %r 380} 381 382; Ideally, the register allocator would avoid the wait here 383; 384; GCN-LABEL: {{^}}image_store_wait: 385; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm 386; SI: s_waitcnt expcnt(0) 387; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm 388; GCN: s_waitcnt vmcnt(0) 389; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm 390define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { 391main_body: 392 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0) 393 %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0) 394 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0) 395 ret void 396} 397 398; SI won't merge ds memory operations, because of the signed offset bug, so 399; we only have check lines for VI. 400; VI-LABEL: image_load_mmo 401; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 402; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 403define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { 404 store float 0.000000e+00, float addrspace(3)* %lds 405 %c0 = extractelement <2 x i32> %c, i32 0 406 %c1 = extractelement <2 x i32> %c, i32 1 407 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) 408 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 409 store float 0.000000e+00, float addrspace(3)* %tmp2 410 ret float %tex 411} 412 413declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 414declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 415declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 416declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 417declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 418declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 419declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 420declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 421 422declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 423declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 424declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 425declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 426declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 427declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 428 429declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 430declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 431declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 432declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 433declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 434declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 435declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 436declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 437 438declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 439declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 440declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 441declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 442declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 443declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 444 445declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 446declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 447declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 448declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 449declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 450declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 451declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 452declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 453 454declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 455declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 456declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1 457declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 458declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 459 460attributes #0 = { nounwind } 461attributes #1 = { nounwind readonly } 462attributes #2 = { nounwind readnone } 463