1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}global_load_i1: 6; GCN: buffer_load_ubyte 7; GCN: v_and_b32_e32 v{{[0-9]+}}, 1 8; GCN: buffer_store_byte 9 10; EG: VTX_READ_8 11; EG: AND_INT 12define amdgpu_kernel void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { 13 %load = load i1, i1 addrspace(1)* %in 14 store i1 %load, i1 addrspace(1)* %out 15 ret void 16} 17 18; FUNC-LABEL: {{^}}global_load_v2i1: 19define amdgpu_kernel void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { 20 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in 21 store <2 x i1> %load, <2 x i1> addrspace(1)* %out 22 ret void 23} 24 25; FUNC-LABEL: {{^}}global_load_v3i1: 26define amdgpu_kernel void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { 27 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in 28 store <3 x i1> %load, <3 x i1> addrspace(1)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}global_load_v4i1: 33define amdgpu_kernel void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { 34 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in 35 store <4 x i1> %load, <4 x i1> addrspace(1)* %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}global_load_v8i1: 40define amdgpu_kernel void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { 41 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in 42 store <8 x i1> %load, <8 x i1> addrspace(1)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}global_load_v16i1: 47define amdgpu_kernel void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { 48 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in 49 store <16 x i1> %load, <16 x i1> addrspace(1)* %out 50 ret void 51} 52 53; FUNC-LABEL: {{^}}global_load_v32i1: 54define amdgpu_kernel void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { 55 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in 56 store <32 x i1> %load, <32 x i1> addrspace(1)* %out 57 ret void 58} 59 60; FUNC-LABEL: {{^}}global_load_v64i1: 61define amdgpu_kernel void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { 62 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in 63 store <64 x i1> %load, <64 x i1> addrspace(1)* %out 64 ret void 65} 66 67; FUNC-LABEL: {{^}}global_zextload_i1_to_i32: 68; GCN: buffer_load_ubyte 69; GCN: buffer_store_dword 70define amdgpu_kernel void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { 71 %a = load i1, i1 addrspace(1)* %in 72 %ext = zext i1 %a to i32 73 store i32 %ext, i32 addrspace(1)* %out 74 ret void 75} 76 77; FUNC-LABEL: {{^}}global_sextload_i1_to_i32: 78; GCN: buffer_load_ubyte 79; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} 80; GCN: buffer_store_dword 81 82; EG: VTX_READ_8 83; EG: BFE_INT 84define amdgpu_kernel void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { 85 %a = load i1, i1 addrspace(1)* %in 86 %ext = sext i1 %a to i32 87 store i32 %ext, i32 addrspace(1)* %out 88 ret void 89} 90 91; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32: 92define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { 93 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in 94 %ext = zext <1 x i1> %load to <1 x i32> 95 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 96 ret void 97} 98 99; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32: 100define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { 101 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in 102 %ext = sext <1 x i1> %load to <1 x i32> 103 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 104 ret void 105} 106 107; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32: 108define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { 109 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in 110 %ext = zext <2 x i1> %load to <2 x i32> 111 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 112 ret void 113} 114 115; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32: 116define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { 117 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in 118 %ext = sext <2 x i1> %load to <2 x i32> 119 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 120 ret void 121} 122 123; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32: 124define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { 125 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in 126 %ext = zext <3 x i1> %load to <3 x i32> 127 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 128 ret void 129} 130 131; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32: 132define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { 133 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in 134 %ext = sext <3 x i1> %load to <3 x i32> 135 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 136 ret void 137} 138 139; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32: 140define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { 141 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in 142 %ext = zext <4 x i1> %load to <4 x i32> 143 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 144 ret void 145} 146 147; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32: 148define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { 149 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in 150 %ext = sext <4 x i1> %load to <4 x i32> 151 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 152 ret void 153} 154 155; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32: 156define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { 157 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in 158 %ext = zext <8 x i1> %load to <8 x i32> 159 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 160 ret void 161} 162 163; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32: 164define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { 165 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in 166 %ext = sext <8 x i1> %load to <8 x i32> 167 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 168 ret void 169} 170 171; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32: 172define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { 173 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in 174 %ext = zext <16 x i1> %load to <16 x i32> 175 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 176 ret void 177} 178 179; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32: 180define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { 181 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in 182 %ext = sext <16 x i1> %load to <16 x i32> 183 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 184 ret void 185} 186 187; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32: 188define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { 189 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in 190 %ext = zext <32 x i1> %load to <32 x i32> 191 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 192 ret void 193} 194 195; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32: 196define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { 197 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in 198 %ext = sext <32 x i1> %load to <32 x i32> 199 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 200 ret void 201} 202 203; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32: 204define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { 205 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in 206 %ext = zext <64 x i1> %load to <64 x i32> 207 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 208 ret void 209} 210 211; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32: 212define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { 213 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in 214 %ext = sext <64 x i1> %load to <64 x i32> 215 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 216 ret void 217} 218 219; FUNC-LABEL: {{^}}global_zextload_i1_to_i64: 220; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]], 221; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} 222; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}} 223; GCN: buffer_store_dwordx2 224define amdgpu_kernel void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { 225 %a = load i1, i1 addrspace(1)* %in 226 %ext = zext i1 %a to i64 227 store i64 %ext, i64 addrspace(1)* %out 228 ret void 229} 230 231; FUNC-LABEL: {{^}}global_sextload_i1_to_i64: 232; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], 233; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} 234; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] 235; GCN: buffer_store_dwordx2 236define amdgpu_kernel void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { 237 %a = load i1, i1 addrspace(1)* %in 238 %ext = sext i1 %a to i64 239 store i64 %ext, i64 addrspace(1)* %out 240 ret void 241} 242 243; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64: 244define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { 245 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in 246 %ext = zext <1 x i1> %load to <1 x i64> 247 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 248 ret void 249} 250 251; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64: 252define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { 253 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in 254 %ext = sext <1 x i1> %load to <1 x i64> 255 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 256 ret void 257} 258 259; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64: 260define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { 261 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in 262 %ext = zext <2 x i1> %load to <2 x i64> 263 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 264 ret void 265} 266 267; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64: 268define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { 269 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in 270 %ext = sext <2 x i1> %load to <2 x i64> 271 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 272 ret void 273} 274 275; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64: 276define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { 277 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in 278 %ext = zext <3 x i1> %load to <3 x i64> 279 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out 280 ret void 281} 282 283; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64: 284define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { 285 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in 286 %ext = sext <3 x i1> %load to <3 x i64> 287 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out 288 ret void 289} 290 291; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64: 292define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { 293 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in 294 %ext = zext <4 x i1> %load to <4 x i64> 295 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 296 ret void 297} 298 299; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64: 300define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { 301 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in 302 %ext = sext <4 x i1> %load to <4 x i64> 303 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 304 ret void 305} 306 307; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64: 308define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { 309 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in 310 %ext = zext <8 x i1> %load to <8 x i64> 311 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 312 ret void 313} 314 315; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64: 316define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { 317 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in 318 %ext = sext <8 x i1> %load to <8 x i64> 319 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 320 ret void 321} 322 323; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64: 324define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { 325 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in 326 %ext = zext <16 x i1> %load to <16 x i64> 327 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 328 ret void 329} 330 331; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64: 332define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { 333 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in 334 %ext = sext <16 x i1> %load to <16 x i64> 335 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 336 ret void 337} 338 339; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64: 340define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { 341 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in 342 %ext = zext <32 x i1> %load to <32 x i64> 343 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 344 ret void 345} 346 347; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64: 348define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { 349 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in 350 %ext = sext <32 x i1> %load to <32 x i64> 351 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 352 ret void 353} 354 355; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64: 356define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { 357 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in 358 %ext = zext <64 x i1> %load to <64 x i64> 359 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 360 ret void 361} 362 363; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64: 364define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { 365 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in 366 %ext = sext <64 x i1> %load to <64 x i64> 367 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 368 ret void 369} 370 371attributes #0 = { nounwind } 372