1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s 4; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6; Testing for ds_read/write_b128 7; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 8; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 9 10; FUNC-LABEL: {{^}}local_load_i8: 11; GCN-NOT: s_wqm_b64 12; SICIVI: s_mov_b32 m0 13; GFX9-NOT: m0 14; GCN: ds_read_u8 15 16; EG: LDS_UBYTE_READ_RET 17define amdgpu_kernel void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 18entry: 19 %ld = load i8, i8 addrspace(3)* %in 20 store i8 %ld, i8 addrspace(3)* %out 21 ret void 22} 23 24; FUNC-LABEL: {{^}}local_load_v2i8: 25; GCN-NOT: s_wqm_b64 26; SICIVI: s_mov_b32 m0 27; GFX9-NOT: m0 28; GCN: ds_read_u16 29 30; EG: LDS_USHORT_READ_RET 31define amdgpu_kernel void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 32entry: 33 %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in 34 store <2 x i8> %ld, <2 x i8> addrspace(3)* %out 35 ret void 36} 37 38; FUNC-LABEL: {{^}}local_load_v3i8: 39; GFX9-NOT: m0 40; GCN: ds_read_b32 41 42; EG: DS_READ_RET 43define amdgpu_kernel void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 44entry: 45 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 46 store <3 x i8> %ld, <3 x i8> addrspace(3)* %out 47 ret void 48} 49 50; FUNC-LABEL: {{^}}local_load_v4i8: 51; GFX9-NOT: m0 52; GCN: ds_read_b32 53 54; EG: LDS_READ_RET 55define amdgpu_kernel void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 56entry: 57 %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in 58 store <4 x i8> %ld, <4 x i8> addrspace(3)* %out 59 ret void 60} 61 62; FUNC-LABEL: {{^}}local_load_v8i8: 63; GFX9-NOT: m0 64; GCN: ds_read_b64 65 66; EG: LDS_READ_RET 67; EG: LDS_READ_RET 68define amdgpu_kernel void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 69entry: 70 %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in 71 store <8 x i8> %ld, <8 x i8> addrspace(3)* %out 72 ret void 73} 74 75; FUNC-LABEL: {{^}}local_load_v16i8: 76; GFX9-NOT: m0 77; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} 78; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}} 79 80; EG: LDS_READ_RET 81; EG: LDS_READ_RET 82; EG: LDS_READ_RET 83; EG: LDS_READ_RET 84define amdgpu_kernel void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 85entry: 86 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in 87 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out 88 ret void 89} 90 91; FUNC-LABEL: {{^}}local_zextload_i8_to_i32: 92; GFX9-NOT: m0 93; GCN-NOT: s_wqm_b64 94; SICIVI: s_mov_b32 m0 95; GCN: ds_read_u8 96 97; EG: LDS_UBYTE_READ_RET 98define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 99 %a = load i8, i8 addrspace(3)* %in 100 %ext = zext i8 %a to i32 101 store i32 %ext, i32 addrspace(3)* %out 102 ret void 103} 104 105; FUNC-LABEL: {{^}}local_sextload_i8_to_i32: 106; GCN-NOT: s_wqm_b64 107; GFX9-NOT: m0 108; SICIVI: s_mov_b32 m0 109; GCN: ds_read_i8 110 111; EG: LDS_UBYTE_READ_RET 112; EG: BFE_INT 113define amdgpu_kernel void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 114 %ld = load i8, i8 addrspace(3)* %in 115 %ext = sext i8 %ld to i32 116 store i32 %ext, i32 addrspace(3)* %out 117 ret void 118} 119 120; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32: 121 122; EG: LDS_UBYTE_READ_RET 123define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 124 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 125 %ext = zext <1 x i8> %load to <1 x i32> 126 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 127 ret void 128} 129 130; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32: 131; GFX9-NOT: m0 132 133; EG: LDS_UBYTE_READ_RET 134; EG: BFE_INT 135define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 136 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 137 %ext = sext <1 x i8> %load to <1 x i32> 138 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32: 143; GFX9-NOT: m0 144; GCN: ds_read_u16 145 146; EG: LDS_USHORT_READ_RET 147define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 148 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 149 %ext = zext <2 x i8> %load to <2 x i32> 150 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 151 ret void 152} 153 154; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32: 155; GCN-NOT: s_wqm_b64 156; GFX9-NOT: m0 157; SICIVI: s_mov_b32 m0 158; GCN: ds_read_u16 159; FIXME: Need to optimize this sequence to avoid extra shift on VI. 160; t23: i16 = srl t39, Constant:i32<8> 161; t31: i32 = any_extend t23 162; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8 163 164; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 165; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 166 167; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}} 168; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 169; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8 170 171; EG: LDS_USHORT_READ_RET 172; EG-DAG: BFE_INT 173; EG-DAG: BFE_INT 174define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 175 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 176 %ext = sext <2 x i8> %load to <2 x i32> 177 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 178 ret void 179} 180 181; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32: 182; GFX9-NOT: m0 183; GCN: ds_read_b32 184 185; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 186; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}} 187; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 188; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, 189 190; EG: LDS_READ_RET 191define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 192entry: 193 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 194 %ext = zext <3 x i8> %ld to <3 x i32> 195 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 196 ret void 197} 198 199; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32: 200; GCN-NOT: s_wqm_b64 201; GFX9-NOT: m0 202; SICIVI: s_mov_b32 m0 203; GCN: ds_read_b32 204 205; GCN-DAG: v_bfe_i32 206; GCN-DAG: v_bfe_i32 207; GCN-DAG: v_bfe_i32 208; GCN-DAG: v_bfe_i32 209 210; GCN-DAG: ds_write_b64 211; GCN-DAG: ds_write_b32 212 213; EG: LDS_READ_RET 214; EG-DAG: BFE_INT 215; EG-DAG: BFE_INT 216; EG-DAG: BFE_INT 217define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 218entry: 219 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 220 %ext = sext <3 x i8> %ld to <3 x i32> 221 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 222 ret void 223} 224 225; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32: 226; GCN-NOT: s_wqm_b64 227; GFX9-NOT: m0 228; SICIVI: s_mov_b32 m0 229; GCN: ds_read_b32 230 231; EG: LDS_READ_RET 232; EG-DAG: BFE_UINT 233; EG-DAG: BFE_UINT 234; EG-DAG: BFE_UINT 235define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 236 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 237 %ext = zext <4 x i8> %load to <4 x i32> 238 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 239 ret void 240} 241 242; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32: 243; GCN-NOT: s_wqm_b64 244; GFX9-NOT: m0 245; SICIVI: s_mov_b32 m0 246; GCN: ds_read_b32 247 248; EG-DAG: LDS_READ_RET 249; EG-DAG: BFE_INT 250; EG-DAG: BFE_INT 251; EG-DAG: BFE_INT 252; EG-DAG: BFE_INT 253define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 254 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 255 %ext = sext <4 x i8> %load to <4 x i32> 256 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 257 ret void 258} 259 260; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32: 261; SICIVI: s_mov_b32 m0 262; GFX9-NOT: m0 263 264; EG-DAG: LDS_READ_RET 265; EG-DAG: LDS_READ_RET 266; EG-DAG: BFE_UINT 267; EG-DAG: BFE_UINT 268; EG-DAG: BFE_UINT 269; EG-DAG: BFE_UINT 270; EG-DAG: BFE_UINT 271; EG-DAG: BFE_UINT 272define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 273 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 274 %ext = zext <8 x i8> %load to <8 x i32> 275 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 276 ret void 277} 278 279; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32: 280; SICIVI: s_mov_b32 m0 281; GFX9-NOT: m0 282 283; EG-DAG: LDS_READ_RET 284; EG-DAG: LDS_READ_RET 285; EG-DAG: BFE_INT 286; EG-DAG: BFE_INT 287; EG-DAG: BFE_INT 288; EG-DAG: BFE_INT 289; EG-DAG: BFE_INT 290; EG-DAG: BFE_INT 291; EG-DAG: BFE_INT 292; EG-DAG: BFE_INT 293define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 294 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 295 %ext = sext <8 x i8> %load to <8 x i32> 296 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 297 ret void 298} 299 300; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32: 301; SICIVI: s_mov_b32 m0 302; GFX9-NOT: m0 303 304; EG-DAG: LDS_READ_RET 305; EG-DAG: LDS_READ_RET 306; EG-DAG: LDS_READ_RET 307; EG-DAG: LDS_READ_RET 308; EG-DAG: BFE_UINT 309; EG-DAG: BFE_UINT 310; EG-DAG: BFE_UINT 311; EG-DAG: BFE_UINT 312; EG-DAG: BFE_UINT 313; EG-DAG: BFE_UINT 314; EG-DAG: BFE_UINT 315; EG-DAG: BFE_UINT 316; EG-DAG: BFE_UINT 317; EG-DAG: BFE_UINT 318; EG-DAG: BFE_UINT 319; EG-DAG: BFE_UINT 320define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 321 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 322 %ext = zext <16 x i8> %load to <16 x i32> 323 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 324 ret void 325} 326 327; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32: 328; SICIVI: s_mov_b32 m0 329; GFX9-NOT: m0 330 331; EG-DAG: LDS_READ_RET 332; EG-DAG: LDS_READ_RET 333; EG-DAG: LDS_READ_RET 334; EG-DAG: LDS_READ_RET 335; EG-DAG: BFE_INT 336; EG-DAG: BFE_INT 337; EG-DAG: BFE_INT 338; EG-DAG: BFE_INT 339; EG-DAG: BFE_INT 340; EG-DAG: BFE_INT 341; EG-DAG: BFE_INT 342; EG-DAG: BFE_INT 343; EG-DAG: BFE_INT 344; EG-DAG: BFE_INT 345; EG-DAG: BFE_INT 346; EG-DAG: BFE_INT 347; EG-DAG: BFE_INT 348; EG-DAG: BFE_INT 349; EG-DAG: BFE_INT 350; EG-DAG: BFE_INT 351define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 352 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 353 %ext = sext <16 x i8> %load to <16 x i32> 354 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 355 ret void 356} 357 358; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32: 359; SICIVI: s_mov_b32 m0 360; GFX9-NOT: m0 361 362; EG-DAG: LDS_READ_RET 363; EG-DAG: LDS_READ_RET 364; EG-DAG: LDS_READ_RET 365; EG-DAG: LDS_READ_RET 366; EG-DAG: LDS_READ_RET 367; EG-DAG: LDS_READ_RET 368; EG-DAG: LDS_READ_RET 369; EG-DAG: LDS_READ_RET 370define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 371 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 372 %ext = zext <32 x i8> %load to <32 x i32> 373 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 374 ret void 375} 376 377; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32: 378; SICIVI: s_mov_b32 m0 379; GFX9-NOT: m0 380 381; EG-DAG: LDS_READ_RET 382; EG-DAG: LDS_READ_RET 383; EG-DAG: LDS_READ_RET 384; EG-DAG: LDS_READ_RET 385; EG-DAG: LDS_READ_RET 386; EG-DAG: LDS_READ_RET 387; EG-DAG: LDS_READ_RET 388; EG-DAG: LDS_READ_RET 389define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 390 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 391 %ext = sext <32 x i8> %load to <32 x i32> 392 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 393 ret void 394} 395 396; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32: 397; SICIVI: s_mov_b32 m0 398; GFX9-NOT: m0 399 400; EG-DAG: LDS_READ_RET 401; EG-DAG: LDS_READ_RET 402; EG-DAG: LDS_READ_RET 403; EG-DAG: LDS_READ_RET 404; EG-DAG: LDS_READ_RET 405; EG-DAG: LDS_READ_RET 406; EG-DAG: LDS_READ_RET 407; EG-DAG: LDS_READ_RET 408; EG-DAG: LDS_READ_RET 409; EG-DAG: LDS_READ_RET 410; EG-DAG: LDS_READ_RET 411; EG-DAG: LDS_READ_RET 412; EG-DAG: LDS_READ_RET 413; EG-DAG: LDS_READ_RET 414; EG-DAG: LDS_READ_RET 415; EG-DAG: LDS_READ_RET 416define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 417 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 418 %ext = zext <64 x i8> %load to <64 x i32> 419 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 420 ret void 421} 422 423; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32: 424; SICIVI: s_mov_b32 m0 425; GFX9-NOT: m0 426 427; EG-DAG: LDS_READ_RET 428; EG-DAG: LDS_READ_RET 429; EG-DAG: LDS_READ_RET 430; EG-DAG: LDS_READ_RET 431; EG-DAG: LDS_READ_RET 432; EG-DAG: LDS_READ_RET 433; EG-DAG: LDS_READ_RET 434; EG-DAG: LDS_READ_RET 435; EG-DAG: LDS_READ_RET 436; EG-DAG: LDS_READ_RET 437; EG-DAG: LDS_READ_RET 438; EG-DAG: LDS_READ_RET 439; EG-DAG: LDS_READ_RET 440; EG-DAG: LDS_READ_RET 441; EG-DAG: LDS_READ_RET 442; EG-DAG: LDS_READ_RET 443define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 444 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 445 %ext = sext <64 x i8> %load to <64 x i32> 446 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 447 ret void 448} 449 450; FUNC-LABEL: {{^}}local_zextload_i8_to_i64: 451; SICIVI: s_mov_b32 m0 452; GFX9-NOT: m0 453 454; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 455; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]], 456; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 457 458; EG: LDS_UBYTE_READ_RET 459; EG: MOV {{.*}}, literal 460; EG: 0.0 461define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 462 %a = load i8, i8 addrspace(3)* %in 463 %ext = zext i8 %a to i64 464 store i64 %ext, i64 addrspace(3)* %out 465 ret void 466} 467 468; FUNC-LABEL: {{^}}local_sextload_i8_to_i64: 469; SICIVI: s_mov_b32 m0 470; GFX9-NOT: m0 471 472; GCN: ds_read_i8 v[[LO:[0-9]+]], 473; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 474 475; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 476 477; EG: LDS_UBYTE_READ_RET 478; EG: ASHR 479; TODO: why not 7? 480; EG: 31 481define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 482 %a = load i8, i8 addrspace(3)* %in 483 %ext = sext i8 %a to i64 484 store i64 %ext, i64 addrspace(3)* %out 485 ret void 486} 487 488; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64: 489; SICIVI: s_mov_b32 m0 490; GFX9-NOT: m0 491 492; EG: LDS_UBYTE_READ_RET 493; EG: MOV {{.*}}, literal 494; TODO: merge? 495; EG: 0.0 496define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 497 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 498 %ext = zext <1 x i8> %load to <1 x i64> 499 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 500 ret void 501} 502 503; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64: 504; SICIVI: s_mov_b32 m0 505; GFX9-NOT: m0 506 507; EG: LDS_UBYTE_READ_RET 508; EG: ASHR 509; TODO: why not 7? 510; EG: 31 511define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 512 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 513 %ext = sext <1 x i8> %load to <1 x i64> 514 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 515 ret void 516} 517 518; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64: 519; SICIVI: s_mov_b32 m0 520; GFX9-NOT: m0 521 522; EG: LDS_USHORT_READ_RET 523define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 524 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 525 %ext = zext <2 x i8> %load to <2 x i64> 526 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 527 ret void 528} 529 530; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64: 531; SICIVI: s_mov_b32 m0 532; GFX9-NOT: m0 533 534; EG: LDS_USHORT_READ_RET 535; EG: BFE_INT 536; EG: BFE_INT 537define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 538 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 539 %ext = sext <2 x i8> %load to <2 x i64> 540 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 541 ret void 542} 543 544; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64: 545; SICIVI: s_mov_b32 m0 546; GFX9-NOT: m0 547 548; EG: LDS_READ_RET 549define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 550 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 551 %ext = zext <4 x i8> %load to <4 x i64> 552 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 553 ret void 554} 555 556; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64: 557; SICIVI: s_mov_b32 m0 558; GFX9-NOT: m0 559 560; EG: LDS_READ_RET 561define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 562 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 563 %ext = sext <4 x i8> %load to <4 x i64> 564 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 565 ret void 566} 567 568; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64: 569; SICIVI: s_mov_b32 m0 570; GFX9-NOT: m0 571 572; EG: LDS_READ_RET 573; EG: LDS_READ_RET 574define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 575 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 576 %ext = zext <8 x i8> %load to <8 x i64> 577 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 578 ret void 579} 580 581; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64: 582; SICIVI: s_mov_b32 m0 583; GFX9-NOT: m0 584 585; EG: LDS_READ_RET 586; EG: LDS_READ_RET 587; EG-DAG: ASHR 588; EG-DAG: ASHR 589; EG-DAG: BFE_INT 590; EG-DAG: BFE_INT 591; EG-DAG: BFE_INT 592; EG-DAG: BFE_INT 593; EG-DAG: BFE_INT 594; EG-DAG: BFE_INT 595; EG-DAG: BFE_INT 596define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 597 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 598 %ext = sext <8 x i8> %load to <8 x i64> 599 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 600 ret void 601} 602 603; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64: 604; SICIVI: s_mov_b32 m0 605; GFX9-NOT: m0 606 607; EG: LDS_READ_RET 608; EG: LDS_READ_RET 609; EG: LDS_READ_RET 610; EG: LDS_READ_RET 611define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 612 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 613 %ext = zext <16 x i8> %load to <16 x i64> 614 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 615 ret void 616} 617 618; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64: 619; SICIVI: s_mov_b32 m0 620; GFX9-NOT: m0 621 622; EG: LDS_READ_RET 623; EG: LDS_READ_RET 624; EG: LDS_READ_RET 625; EG: LDS_READ_RET 626define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 627 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 628 %ext = sext <16 x i8> %load to <16 x i64> 629 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 630 ret void 631} 632 633; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64: 634; SICIVI: s_mov_b32 m0 635; GFX9-NOT: m0 636 637; EG: LDS_READ_RET 638; EG: LDS_READ_RET 639; EG: LDS_READ_RET 640; EG: LDS_READ_RET 641; EG: LDS_READ_RET 642; EG: LDS_READ_RET 643; EG: LDS_READ_RET 644; EG: LDS_READ_RET 645define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 646 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 647 %ext = zext <32 x i8> %load to <32 x i64> 648 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 649 ret void 650} 651 652; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64: 653; SICIVI: s_mov_b32 m0 654; GFX9-NOT: m0 655 656; EG: LDS_READ_RET 657; EG: LDS_READ_RET 658; EG: LDS_READ_RET 659; EG: LDS_READ_RET 660; EG: LDS_READ_RET 661; EG: LDS_READ_RET 662; EG: LDS_READ_RET 663; EG: LDS_READ_RET 664define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 665 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 666 %ext = sext <32 x i8> %load to <32 x i64> 667 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 668 ret void 669} 670 671; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64: 672; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 673; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 674; %ext = zext <64 x i8> %load to <64 x i64> 675; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 676; ret void 677; } 678 679; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64: 680; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 681; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 682; %ext = sext <64 x i8> %load to <64 x i64> 683; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 684; ret void 685; } 686 687; FUNC-LABEL: {{^}}local_zextload_i8_to_i16: 688; SICIVI: s_mov_b32 m0 689; GFX9-NOT: m0 690; GCN: ds_read_u8 v[[VAL:[0-9]+]], 691; GCN: ds_write_b16 v[[VAL:[0-9]+]] 692 693; EG: LDS_UBYTE_READ_RET 694; EG: LDS_SHORT_WRITE 695define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 696 %a = load i8, i8 addrspace(3)* %in 697 %ext = zext i8 %a to i16 698 store i16 %ext, i16 addrspace(3)* %out 699 ret void 700} 701 702; FUNC-LABEL: {{^}}local_sextload_i8_to_i16: 703; SICIVI: s_mov_b32 m0 704; GFX9-NOT: m0 705; GCN: ds_read_i8 v[[VAL:[0-9]+]], 706; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]] 707 708; EG: LDS_UBYTE_READ_RET 709; EG: BFE_INT 710; EG: LDS_SHORT_WRITE 711define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 712 %a = load i8, i8 addrspace(3)* %in 713 %ext = sext i8 %a to i16 714 store i16 %ext, i16 addrspace(3)* %out 715 ret void 716} 717 718; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16: 719; SICIVI: s_mov_b32 m0 720; GFX9-NOT: m0 721 722; EG: LDS_UBYTE_READ_RET 723; EG: LDS_SHORT_WRITE 724define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 725 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 726 %ext = zext <1 x i8> %load to <1 x i16> 727 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out 728 ret void 729} 730 731; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16: 732; SICIVI: s_mov_b32 m0 733; GFX9-NOT: m0 734 735; EG: LDS_UBYTE_READ_RET 736; EG: BFE_INT 737; EG: LDS_SHORT_WRITE 738define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 739 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 740 %ext = sext <1 x i8> %load to <1 x i16> 741 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out 742 ret void 743} 744 745; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16: 746; SICIVI: s_mov_b32 m0 747; GFX9-NOT: m0 748 749; EG: LDS_USHORT_READ_RET 750; EG: LDS_WRITE 751define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 752 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 753 %ext = zext <2 x i8> %load to <2 x i16> 754 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out 755 ret void 756} 757 758; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16: 759; SICIVI: s_mov_b32 m0 760; GFX9-NOT: m0 761 762; EG: LDS_USHORT_READ_RET 763; EG: BFE_INT 764; EG: BFE_INT 765; EG: LDS_WRITE 766define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 767 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 768 %ext = sext <2 x i8> %load to <2 x i16> 769 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out 770 ret void 771} 772 773; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16: 774; SICIVI: s_mov_b32 m0 775; GFX9-NOT: m0 776 777; EG: LDS_READ_RET 778; EG: LDS_WRITE 779; EG: LDS_WRITE 780define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 781 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 782 %ext = zext <4 x i8> %load to <4 x i16> 783 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out 784 ret void 785} 786 787; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16: 788; SICIVI: s_mov_b32 m0 789; GFX9-NOT: m0 790 791; EG: LDS_READ_RET 792; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR 793; EG-DAG: BFE_INT 794; EG-DAG: BFE_INT 795; EG-DAG: BFE_INT 796; EG-DAG: BFE_INT 797; EG: LDS_WRITE 798; EG: LDS_WRITE 799define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 800 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 801 %ext = sext <4 x i8> %load to <4 x i16> 802 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out 803 ret void 804} 805 806; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16: 807; SICIVI: s_mov_b32 m0 808; GFX9-NOT: m0 809 810; EG: LDS_READ_RET 811; EG: LDS_READ_RET 812; EG: LDS_WRITE 813; EG: LDS_WRITE 814; EG: LDS_WRITE 815; EG: LDS_WRITE 816define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 817 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 818 %ext = zext <8 x i8> %load to <8 x i16> 819 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out 820 ret void 821} 822 823; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16: 824; SICIVI: s_mov_b32 m0 825; GFX9-NOT: m0 826 827; EG: LDS_READ_RET 828; EG: LDS_READ_RET 829; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR 830; EG-DAG: BFE_INT 831; EG-DAG: BFE_INT 832; EG-DAG: BFE_INT 833; EG-DAG: BFE_INT 834; EG-DAG: BFE_INT 835; EG-DAG: BFE_INT 836; EG-DAG: BFE_INT 837; EG-DAG: BFE_INT 838; EG: LDS_WRITE 839; EG: LDS_WRITE 840; EG: LDS_WRITE 841; EG: LDS_WRITE 842define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 843 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 844 %ext = sext <8 x i8> %load to <8 x i16> 845 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out 846 ret void 847} 848 849; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16: 850; SICIVI: s_mov_b32 m0 851; GFX9-NOT: m0 852 853; EG: LDS_READ_RET 854; EG: LDS_READ_RET 855; EG: LDS_READ_RET 856; EG: LDS_READ_RET 857; EG: LDS_WRITE 858; EG: LDS_WRITE 859; EG: LDS_WRITE 860; EG: LDS_WRITE 861; EG: LDS_WRITE 862; EG: LDS_WRITE 863; EG: LDS_WRITE 864; EG: LDS_WRITE 865define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 866 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 867 %ext = zext <16 x i8> %load to <16 x i16> 868 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out 869 ret void 870} 871 872; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16: 873; SICIVI: s_mov_b32 m0 874; GFX9-NOT: m0 875 876; EG: LDS_READ_RET 877; EG: LDS_READ_RET 878; EG: LDS_READ_RET 879; EG: LDS_READ_RET 880; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR 881; EG-DAG: BFE_INT 882; EG-DAG: BFE_INT 883; EG-DAG: BFE_INT 884; EG-DAG: BFE_INT 885; EG-DAG: BFE_INT 886; EG-DAG: BFE_INT 887; EG-DAG: BFE_INT 888; EG-DAG: BFE_INT 889; EG-DAG: BFE_INT 890; EG-DAG: BFE_INT 891; EG-DAG: BFE_INT 892; EG-DAG: BFE_INT 893; EG-DAG: BFE_INT 894; EG-DAG: BFE_INT 895; EG-DAG: BFE_INT 896; EG-DAG: BFE_INT 897; EG: LDS_WRITE 898; EG: LDS_WRITE 899; EG: LDS_WRITE 900; EG: LDS_WRITE 901; EG: LDS_WRITE 902; EG: LDS_WRITE 903; EG: LDS_WRITE 904; EG: LDS_WRITE 905define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 906 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 907 %ext = sext <16 x i8> %load to <16 x i16> 908 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out 909 ret void 910} 911 912; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16: 913; SICIVI: s_mov_b32 m0 914; GFX9-NOT: m0 915 916; EG: LDS_READ_RET 917; EG: LDS_READ_RET 918; EG: LDS_READ_RET 919; EG: LDS_READ_RET 920; EG: LDS_READ_RET 921; EG: LDS_READ_RET 922; EG: LDS_READ_RET 923; EG: LDS_READ_RET 924; EG: LDS_WRITE 925; EG: LDS_WRITE 926; EG: LDS_WRITE 927; EG: LDS_WRITE 928; EG: LDS_WRITE 929; EG: LDS_WRITE 930; EG: LDS_WRITE 931; EG: LDS_WRITE 932; EG: LDS_WRITE 933; EG: LDS_WRITE 934; EG: LDS_WRITE 935; EG: LDS_WRITE 936; EG: LDS_WRITE 937; EG: LDS_WRITE 938; EG: LDS_WRITE 939; EG: LDS_WRITE 940define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 941 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 942 %ext = zext <32 x i8> %load to <32 x i16> 943 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out 944 ret void 945} 946 947; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16: 948; SICIVI: s_mov_b32 m0 949; GFX9-NOT: m0 950 951; EG: LDS_READ_RET 952; EG: LDS_READ_RET 953; EG: LDS_READ_RET 954; EG: LDS_READ_RET 955; EG: LDS_READ_RET 956; EG: LDS_READ_RET 957; EG: LDS_READ_RET 958; EG: LDS_READ_RET 959; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR 960; EG-DAG: BFE_INT 961; EG-DAG: BFE_INT 962; EG-DAG: BFE_INT 963; EG-DAG: BFE_INT 964; EG-DAG: BFE_INT 965; EG-DAG: BFE_INT 966; EG-DAG: BFE_INT 967; EG-DAG: BFE_INT 968; EG-DAG: BFE_INT 969; EG-DAG: BFE_INT 970; EG-DAG: BFE_INT 971; EG-DAG: BFE_INT 972; EG-DAG: BFE_INT 973; EG-DAG: BFE_INT 974; EG-DAG: BFE_INT 975; EG-DAG: BFE_INT 976; EG-DAG: BFE_INT 977; EG-DAG: BFE_INT 978; EG-DAG: BFE_INT 979; EG-DAG: BFE_INT 980; EG-DAG: BFE_INT 981; EG-DAG: BFE_INT 982; EG-DAG: BFE_INT 983; EG-DAG: BFE_INT 984; EG-DAG: BFE_INT 985; EG-DAG: BFE_INT 986; EG-DAG: BFE_INT 987; EG-DAG: BFE_INT 988; EG: LDS_WRITE 989; EG: LDS_WRITE 990; EG: LDS_WRITE 991; EG: LDS_WRITE 992; EG: LDS_WRITE 993; EG: LDS_WRITE 994; EG: LDS_WRITE 995; EG: LDS_WRITE 996; EG: LDS_WRITE 997; EG: LDS_WRITE 998; EG: LDS_WRITE 999; EG: LDS_WRITE 1000; EG: LDS_WRITE 1001; EG: LDS_WRITE 1002; EG: LDS_WRITE 1003; EG: LDS_WRITE 1004define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 1005 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 1006 %ext = sext <32 x i8> %load to <32 x i16> 1007 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out 1008 ret void 1009} 1010 1011; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16: 1012; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 1013; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 1014; %ext = zext <64 x i8> %load to <64 x i16> 1015; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out 1016; ret void 1017; } 1018 1019; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16: 1020; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 1021; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 1022; %ext = sext <64 x i8> %load to <64 x i16> 1023; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out 1024; ret void 1025; } 1026 1027; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 1028; FUNC-LABEL: {{^}}local_v16i8_to_128: 1029 1030; SI-NOT: ds_read_b128 1031; SI-NOT: ds_write_b128 1032 1033; CIVI: ds_read_b128 1034; CIVI: ds_write_b128 1035 1036; EG: LDS_READ_RET 1037; EG: LDS_READ_RET 1038; EG: LDS_READ_RET 1039; EG: LDS_READ_RET 1040define amdgpu_kernel void @local_v16i8_to_128(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) { 1041 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 16 1042 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out, align 16 1043 ret void 1044} 1045 1046attributes #0 = { nounwind } 1047