1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6; Testing for ds_read/write_b128 7; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 8; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 9 10; FUNC-LABEL: {{^}}local_load_i16: 11; GFX9-NOT: m0 12; SICIVI: s_mov_b32 m0 13 14; GCN: ds_read_u16 v{{[0-9]+}} 15 16; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 17; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 18; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 19; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 20; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]] 21define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) { 22entry: 23 %ld = load i16, i16 addrspace(3)* %in 24 store i16 %ld, i16 addrspace(3)* %out 25 ret void 26} 27 28; FUNC-LABEL: {{^}}local_load_v2i16: 29; GFX9-NOT: m0 30; SICIVI: s_mov_b32 m0 31 32; GCN: ds_read_b32 33 34; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 35; EG: LDS_READ_RET {{.*}} [[FROM]] 36; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 37; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 38; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 39define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) { 40entry: 41 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in 42 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}local_load_v3i16: 47; GFX9-NOT: m0 48; SICIVI: s_mov_b32 m0 49 50; GCN: ds_read_b64 51; GCN-DAG: ds_write_b32 52; GCN-DAG: ds_write_b16 53 54; EG-DAG: LDS_USHORT_READ_RET 55; EG-DAG: LDS_READ_RET 56define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 57entry: 58 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 59 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out 60 ret void 61} 62 63; FUNC-LABEL: {{^}}local_load_v4i16: 64; GFX9-NOT: m0 65; SICIVI: s_mov_b32 m0 66 67; GCN: ds_read_b64 68 69; EG: LDS_READ_RET 70; EG: LDS_READ_RET 71define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) { 72entry: 73 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in 74 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out 75 ret void 76} 77 78; FUNC-LABEL: {{^}}local_load_v8i16: 79; GFX9-NOT: m0 80; SICIVI: s_mov_b32 m0 81 82; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 83 84; EG: LDS_READ_RET 85; EG: LDS_READ_RET 86; EG: LDS_READ_RET 87; EG: LDS_READ_RET 88define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 89entry: 90 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in 91 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out 92 ret void 93} 94 95; FUNC-LABEL: {{^}}local_load_v16i16: 96; GFX9-NOT: m0 97; SICIVI: s_mov_b32 m0 98 99; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}} 100; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} 101 102 103; EG: LDS_READ_RET 104; EG: LDS_READ_RET 105; EG: LDS_READ_RET 106; EG: LDS_READ_RET 107 108; EG: LDS_READ_RET 109; EG: LDS_READ_RET 110; EG: LDS_READ_RET 111; EG: LDS_READ_RET 112define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) { 113entry: 114 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in 115 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out 116 ret void 117} 118 119; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: 120; GFX9-NOT: m0 121; SICIVI: s_mov_b32 m0 122 123; GCN: ds_read_u16 124; GCN: ds_write_b32 125 126; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 127; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 128; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 129; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 130; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 131define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 132 %a = load i16, i16 addrspace(3)* %in 133 %ext = zext i16 %a to i32 134 store i32 %ext, i32 addrspace(3)* %out 135 ret void 136} 137 138; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: 139; GCN-NOT: s_wqm_b64 140 141; GFX9-NOT: m0 142; SICIVI: s_mov_b32 m0 143 144; GCN: ds_read_i16 145 146; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 147; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 148; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 149; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 150; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 151; EG: 16 152; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 153define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 154 %a = load i16, i16 addrspace(3)* %in 155 %ext = sext i16 %a to i32 156 store i32 %ext, i32 addrspace(3)* %out 157 ret void 158} 159 160; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: 161; GFX9-NOT: m0 162; SICIVI: s_mov_b32 m0 163 164; GCN: ds_read_u16 165 166; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 167; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 168; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 169; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 170; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 171define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 172 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 173 %ext = zext <1 x i16> %load to <1 x i32> 174 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: 179; GFX9-NOT: m0 180; SICIVI: s_mov_b32 m0 181 182; GCN: ds_read_i16 183 184; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 185; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 186; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 187; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 188; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 189; EG: 16 190; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 191define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 192 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 193 %ext = sext <1 x i16> %load to <1 x i32> 194 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 195 ret void 196} 197 198; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: 199; GCN-NOT: s_wqm_b64 200; GFX9-NOT: m0 201; SICIVI: s_mov_b32 m0 202 203; GCN: ds_read_b32 204 205; EG: LDS_READ_RET 206define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 207 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 208 %ext = zext <2 x i16> %load to <2 x i32> 209 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 210 ret void 211} 212 213; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: 214; GCN-NOT: s_wqm_b64 215; GFX9-NOT: m0 216; SICIVI: s_mov_b32 m0 217 218; GCN: ds_read_b32 219 220; EG: LDS_READ_RET 221; EG: BFE_INT 222; EG: BFE_INT 223define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 224 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 225 %ext = sext <2 x i16> %load to <2 x i32> 226 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 227 ret void 228} 229 230; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: 231; GFX9-NOT: m0 232; SICIVI: s_mov_b32 m0 233 234; GCN: ds_read_b64 235; GCN-DAG: ds_write_b32 236; GCN-DAG: ds_write_b64 237 238; EG: LDS_READ_RET 239define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 240entry: 241 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 242 %ext = zext <3 x i16> %ld to <3 x i32> 243 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 244 ret void 245} 246 247; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: 248; GFX9-NOT: m0 249; SICIVI: s_mov_b32 m0 250 251; GCN: ds_read_b64 252; GCN-DAG: ds_write_b32 253; GCN-DAG: ds_write_b64 254 255; EG: LDS_READ_RET 256; EG-DAG: BFE_INT 257; EG-DAG: BFE_INT 258; EG-DAG: BFE_INT 259define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 260entry: 261 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 262 %ext = sext <3 x i16> %ld to <3 x i32> 263 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 264 ret void 265} 266 267; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: 268; GCN-NOT: s_wqm_b64 269; GFX9-NOT: m0 270; SICIVI: s_mov_b32 m0 271 272; GCN: ds_read_b64 273 274; EG: LDS_READ_RET 275; EG: LDS_READ_RET 276define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 277 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 278 %ext = zext <4 x i16> %load to <4 x i32> 279 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 280 ret void 281} 282 283; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: 284; GCN-NOT: s_wqm_b64 285; GFX9-NOT: m0 286; SICIVI: s_mov_b32 m0 287 288; GCN: ds_read_b64 289 290; EG: LDS_READ_RET 291; EG: LDS_READ_RET 292; EG-DAG: BFE_INT 293; EG-DAG: BFE_INT 294; EG-DAG: BFE_INT 295; EG-DAG: BFE_INT 296define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 297 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 298 %ext = sext <4 x i16> %load to <4 x i32> 299 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 300 ret void 301} 302 303; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: 304; GFX9-NOT: m0 305; SICIVI: s_mov_b32 m0 306 307; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 308 309; EG: LDS_READ_RET 310; EG: LDS_READ_RET 311; EG: LDS_READ_RET 312; EG: LDS_READ_RET 313define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 314 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 315 %ext = zext <8 x i16> %load to <8 x i32> 316 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 317 ret void 318} 319 320; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: 321; GFX9-NOT: m0 322; SICIVI: s_mov_b32 m0 323 324; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 325 326; EG: LDS_READ_RET 327; EG: LDS_READ_RET 328; EG: LDS_READ_RET 329; EG: LDS_READ_RET 330; EG-DAG: BFE_INT 331; EG-DAG: BFE_INT 332; EG-DAG: BFE_INT 333; EG-DAG: BFE_INT 334; EG-DAG: BFE_INT 335; EG-DAG: BFE_INT 336; EG-DAG: BFE_INT 337; EG-DAG: BFE_INT 338define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 339 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 340 %ext = sext <8 x i16> %load to <8 x i32> 341 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 342 ret void 343} 344 345; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: 346; GFX9-NOT: m0 347; SICIVI: s_mov_b32 m0 348 349; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 350; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 351 352; GCN: ds_write2_b64 353; GCN: ds_write2_b64 354; GCN: ds_write2_b64 355; GCN: ds_write2_b64 356 357; EG: LDS_READ_RET 358; EG: LDS_READ_RET 359; EG: LDS_READ_RET 360; EG: LDS_READ_RET 361; EG: LDS_READ_RET 362; EG: LDS_READ_RET 363; EG: LDS_READ_RET 364; EG: LDS_READ_RET 365define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 366 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 367 %ext = zext <16 x i16> %load to <16 x i32> 368 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 369 ret void 370} 371 372; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: 373; GFX9-NOT: m0 374; SICIVI: s_mov_b32 m0 375 376 377; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 378; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 379 380; EG: LDS_READ_RET 381; EG: LDS_READ_RET 382; EG: LDS_READ_RET 383; EG: LDS_READ_RET 384; EG: LDS_READ_RET 385; EG: LDS_READ_RET 386; EG: LDS_READ_RET 387; EG: LDS_READ_RET 388; EG-DAG: BFE_INT 389; EG-DAG: BFE_INT 390; EG-DAG: BFE_INT 391; EG-DAG: BFE_INT 392; EG-DAG: BFE_INT 393; EG-DAG: BFE_INT 394; EG-DAG: BFE_INT 395; EG-DAG: BFE_INT 396; EG-DAG: BFE_INT 397; EG-DAG: BFE_INT 398; EG-DAG: BFE_INT 399; EG-DAG: BFE_INT 400; EG-DAG: BFE_INT 401; EG-DAG: BFE_INT 402; EG-DAG: BFE_INT 403; EG-DAG: BFE_INT 404define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 405 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 406 %ext = sext <16 x i16> %load to <16 x i32> 407 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 408 ret void 409} 410 411; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: 412; GFX9-NOT: m0 413; SICIVI: s_mov_b32 m0 414 415; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 416; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 417; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 418; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 419 420; EG: LDS_READ_RET 421; EG: LDS_READ_RET 422; EG: LDS_READ_RET 423; EG: LDS_READ_RET 424; EG: LDS_READ_RET 425; EG: LDS_READ_RET 426; EG: LDS_READ_RET 427; EG: LDS_READ_RET 428; EG: LDS_READ_RET 429; EG: LDS_READ_RET 430; EG: LDS_READ_RET 431; EG: LDS_READ_RET 432; EG: LDS_READ_RET 433; EG: LDS_READ_RET 434; EG: LDS_READ_RET 435; EG: LDS_READ_RET 436define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 437 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 438 %ext = zext <32 x i16> %load to <32 x i32> 439 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 440 ret void 441} 442 443; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: 444; GFX9-NOT: m0 445; SICIVI: s_mov_b32 m0 446 447; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 448; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 449; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 450; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 451; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 452; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 453; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 454; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 455; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 456; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 457; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 458; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 459 460; EG: LDS_READ_RET 461; EG: LDS_READ_RET 462; EG: LDS_READ_RET 463; EG: LDS_READ_RET 464; EG: LDS_READ_RET 465; EG: LDS_READ_RET 466; EG: LDS_READ_RET 467; EG: LDS_READ_RET 468; EG: LDS_READ_RET 469; EG: LDS_READ_RET 470; EG: LDS_READ_RET 471; EG: LDS_READ_RET 472; EG: LDS_READ_RET 473; EG: LDS_READ_RET 474; EG: LDS_READ_RET 475; EG: LDS_READ_RET 476define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 477 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 478 %ext = sext <32 x i16> %load to <32 x i32> 479 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 480 ret void 481} 482 483; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: 484; GFX9-NOT: m0 485; SICIVI: s_mov_b32 m0 486 487; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 488; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 489; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 490; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 491; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 492; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 493; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 494; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 495; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 496; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 497; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27 498; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25 499; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23 500; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21 501; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19 502; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17 503; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 504; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 505; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 506; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 507; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 508; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 509; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 510; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 511 512; EG: LDS_READ_RET 513; EG: LDS_READ_RET 514; EG: LDS_READ_RET 515; EG: LDS_READ_RET 516; EG: LDS_READ_RET 517; EG: LDS_READ_RET 518; EG: LDS_READ_RET 519; EG: LDS_READ_RET 520; EG: LDS_READ_RET 521; EG: LDS_READ_RET 522; EG: LDS_READ_RET 523; EG: LDS_READ_RET 524; EG: LDS_READ_RET 525; EG: LDS_READ_RET 526; EG: LDS_READ_RET 527; EG: LDS_READ_RET 528; EG: LDS_READ_RET 529; EG: LDS_READ_RET 530; EG: LDS_READ_RET 531; EG: LDS_READ_RET 532; EG: LDS_READ_RET 533; EG: LDS_READ_RET 534; EG: LDS_READ_RET 535; EG: LDS_READ_RET 536; EG: LDS_READ_RET 537; EG: LDS_READ_RET 538; EG: LDS_READ_RET 539; EG: LDS_READ_RET 540; EG: LDS_READ_RET 541; EG: LDS_READ_RET 542; EG: LDS_READ_RET 543; EG: LDS_READ_RET 544define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 545 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 546 %ext = zext <64 x i16> %load to <64 x i32> 547 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 548 ret void 549} 550 551; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: 552; GFX9-NOT: m0 553; SICIVI: s_mov_b32 m0 554 555; EG: LDS_READ_RET 556; EG: LDS_READ_RET 557; EG: LDS_READ_RET 558; EG: LDS_READ_RET 559; EG: LDS_READ_RET 560; EG: LDS_READ_RET 561; EG: LDS_READ_RET 562; EG: LDS_READ_RET 563; EG: LDS_READ_RET 564; EG: LDS_READ_RET 565; EG: LDS_READ_RET 566; EG: LDS_READ_RET 567; EG: LDS_READ_RET 568; EG: LDS_READ_RET 569; EG: LDS_READ_RET 570; EG: LDS_READ_RET 571; EG: LDS_READ_RET 572; EG: LDS_READ_RET 573; EG: LDS_READ_RET 574; EG: LDS_READ_RET 575; EG: LDS_READ_RET 576; EG: LDS_READ_RET 577; EG: LDS_READ_RET 578; EG: LDS_READ_RET 579; EG: LDS_READ_RET 580; EG: LDS_READ_RET 581; EG: LDS_READ_RET 582; EG: LDS_READ_RET 583; EG: LDS_READ_RET 584; EG: LDS_READ_RET 585; EG: LDS_READ_RET 586; EG: LDS_READ_RET 587define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 588 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 589 %ext = sext <64 x i16> %load to <64 x i32> 590 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 591 ret void 592} 593 594; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: 595; GFX9-NOT: m0 596; SICIVI: s_mov_b32 m0 597 598; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], 599; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 600 601; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 602 603; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 604; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 605; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 606; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 607; EG-DAG: LDS_WRITE 608define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 609 %a = load i16, i16 addrspace(3)* %in 610 %ext = zext i16 %a to i64 611 store i64 %ext, i64 addrspace(3)* %out 612 ret void 613} 614 615; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: 616; GFX9-NOT: m0 617; SICIVI: s_mov_b32 m0 618 619; FIXME: Need to optimize this sequence to avoid an extra shift. 620; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32 621; t28: i64 = any_extend t25 622; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16 623; SI: ds_read_i16 v[[LO:[0-9]+]], 624; GFX89: ds_read_u16 v[[ULO:[0-9]+]] 625; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 626; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 627 628; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 629 630; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 631; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 632; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 633; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 634; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 635; EG-DAG: LDS_WRITE 636; EG-DAG: 16 637; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 638define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 639 %a = load i16, i16 addrspace(3)* %in 640 %ext = sext i16 %a to i64 641 store i64 %ext, i64 addrspace(3)* %out 642 ret void 643} 644 645; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: 646; GFX9-NOT: m0 647; SICIVI: s_mov_b32 m0 648 649 650; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 651; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 652; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 653; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 654; EG-DAG: LDS_WRITE 655define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 656 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 657 %ext = zext <1 x i16> %load to <1 x i64> 658 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 659 ret void 660} 661 662; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: 663; GFX9-NOT: m0 664; SICIVI: s_mov_b32 m0 665 666 667; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 668; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 669; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 670; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 671; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 672; EG-DAG: LDS_WRITE 673; EG-DAG: 16 674; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 675define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 676 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 677 %ext = sext <1 x i16> %load to <1 x i64> 678 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 679 ret void 680} 681 682; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: 683; GFX9-NOT: m0 684; SICIVI: s_mov_b32 m0 685 686 687; EG: LDS_READ_RET 688define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 689 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 690 %ext = zext <2 x i16> %load to <2 x i64> 691 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 692 ret void 693} 694 695; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: 696; GFX9-NOT: m0 697; SICIVI: s_mov_b32 m0 698 699 700; EG: LDS_READ_RET 701; EG-DAG: BFE_INT 702; EG-DAG: ASHR 703define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 704 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 705 %ext = sext <2 x i16> %load to <2 x i64> 706 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 707 ret void 708} 709 710; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: 711; GFX9-NOT: m0 712; SICIVI: s_mov_b32 m0 713 714 715; EG: LDS_READ_RET 716; EG: LDS_READ_RET 717define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 718 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 719 %ext = zext <4 x i16> %load to <4 x i64> 720 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 721 ret void 722} 723 724; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: 725; GFX9-NOT: m0 726; SICIVI: s_mov_b32 m0 727 728 729; EG: LDS_READ_RET 730; EG: LDS_READ_RET 731; EG-DAG: BFE_INT 732; EG-DAG: BFE_INT 733; EG-DAG: ASHR 734; EG-DAG: ASHR 735define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 736 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 737 %ext = sext <4 x i16> %load to <4 x i64> 738 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 739 ret void 740} 741 742; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: 743; GFX9-NOT: m0 744; SICIVI: s_mov_b32 m0 745 746 747; EG: LDS_READ_RET 748; EG: LDS_READ_RET 749; EG: LDS_READ_RET 750; EG: LDS_READ_RET 751define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 752 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 753 %ext = zext <8 x i16> %load to <8 x i64> 754 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 755 ret void 756} 757 758; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: 759; GFX9-NOT: m0 760; SICIVI: s_mov_b32 m0 761 762 763; EG: LDS_READ_RET 764; EG: LDS_READ_RET 765; EG: LDS_READ_RET 766; EG: LDS_READ_RET 767; EG-DAG: BFE_INT 768; EG-DAG: BFE_INT 769; EG-DAG: ASHR 770; EG-DAG: ASHR 771; EG-DAG: BFE_INT 772; EG-DAG: BFE_INT 773; EG-DAG: ASHR 774; EG-DAG: ASHR 775define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 776 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 777 %ext = sext <8 x i16> %load to <8 x i64> 778 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 779 ret void 780} 781 782; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: 783; GFX9-NOT: m0 784; SICIVI: s_mov_b32 m0 785 786 787; EG: LDS_READ_RET 788; EG: LDS_READ_RET 789; EG: LDS_READ_RET 790; EG: LDS_READ_RET 791; EG: LDS_READ_RET 792; EG: LDS_READ_RET 793; EG: LDS_READ_RET 794; EG: LDS_READ_RET 795define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 796 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 797 %ext = zext <16 x i16> %load to <16 x i64> 798 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 799 ret void 800} 801 802; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: 803; GFX9-NOT: m0 804; SICIVI: s_mov_b32 m0 805 806 807; EG: LDS_READ_RET 808; EG: LDS_READ_RET 809; EG: LDS_READ_RET 810; EG: LDS_READ_RET 811; EG: LDS_READ_RET 812; EG: LDS_READ_RET 813; EG: LDS_READ_RET 814; EG: LDS_READ_RET 815; EG-DAG: BFE_INT 816; EG-DAG: BFE_INT 817; EG-DAG: ASHR 818; EG-DAG: ASHR 819; EG-DAG: BFE_INT 820; EG-DAG: BFE_INT 821; EG-DAG: ASHR 822; EG-DAG: ASHR 823; EG-DAG: BFE_INT 824; EG-DAG: BFE_INT 825; EG-DAG: ASHR 826; EG-DAG: ASHR 827; EG-DAG: BFE_INT 828; EG-DAG: BFE_INT 829; EG-DAG: ASHR 830; EG-DAG: ASHR 831define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 832 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 833 %ext = sext <16 x i16> %load to <16 x i64> 834 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 835 ret void 836} 837 838; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: 839; GFX9-NOT: m0 840; SICIVI: s_mov_b32 m0 841 842 843; EG: LDS_READ_RET 844; EG: LDS_READ_RET 845; EG: LDS_READ_RET 846; EG: LDS_READ_RET 847; EG: LDS_READ_RET 848; EG: LDS_READ_RET 849; EG: LDS_READ_RET 850; EG: LDS_READ_RET 851; EG: LDS_READ_RET 852; EG: LDS_READ_RET 853; EG: LDS_READ_RET 854; EG: LDS_READ_RET 855; EG: LDS_READ_RET 856; EG: LDS_READ_RET 857; EG: LDS_READ_RET 858; EG: LDS_READ_RET 859define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 860 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 861 %ext = zext <32 x i16> %load to <32 x i64> 862 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 863 ret void 864} 865 866; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: 867; GFX9-NOT: m0 868; SICIVI: s_mov_b32 m0 869 870 871; EG: LDS_READ_RET 872; EG: LDS_READ_RET 873; EG: LDS_READ_RET 874; EG: LDS_READ_RET 875; EG: LDS_READ_RET 876; EG: LDS_READ_RET 877; EG: LDS_READ_RET 878; EG: LDS_READ_RET 879; EG: LDS_READ_RET 880; EG: LDS_READ_RET 881; EG: LDS_READ_RET 882; EG: LDS_READ_RET 883; EG: LDS_READ_RET 884; EG: LDS_READ_RET 885; EG: LDS_READ_RET 886; EG: LDS_READ_RET 887; EG-DAG: BFE_INT 888; EG-DAG: BFE_INT 889; EG-DAG: ASHR 890; EG-DAG: ASHR 891; EG-DAG: BFE_INT 892; EG-DAG: BFE_INT 893; EG-DAG: ASHR 894; EG-DAG: ASHR 895; EG-DAG: BFE_INT 896; EG-DAG: BFE_INT 897; EG-DAG: ASHR 898; EG-DAG: ASHR 899; EG-DAG: BFE_INT 900; EG-DAG: BFE_INT 901; EG-DAG: ASHR 902; EG-DAG: ASHR 903; EG-DAG: BFE_INT 904; EG-DAG: BFE_INT 905; EG-DAG: ASHR 906; EG-DAG: ASHR 907; EG-DAG: BFE_INT 908; EG-DAG: BFE_INT 909; EG-DAG: ASHR 910; EG-DAG: ASHR 911; EG-DAG: BFE_INT 912; EG-DAG: BFE_INT 913; EG-DAG: ASHR 914; EG-DAG: ASHR 915; EG-DAG: BFE_INT 916; EG-DAG: BFE_INT 917; EG-DAG: ASHR 918; EG-DAG: ASHR 919define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 920 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 921 %ext = sext <32 x i16> %load to <32 x i64> 922 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 923 ret void 924} 925 926; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64: 927; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 928; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 929; %ext = zext <64 x i16> %load to <64 x i64> 930; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 931; ret void 932; } 933 934; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64: 935; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 936; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 937; %ext = sext <64 x i16> %load to <64 x i64> 938; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 939; ret void 940; } 941 942; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 943; FUNC-LABEL: {{^}}local_v8i16_to_128: 944 945; SI-NOT: ds_read_b128 946; SI-NOT: ds_write_b128 947 948; CIVI: ds_read_b128 949; CIVI: ds_write_b128 950 951; EG: LDS_READ_RET 952; EG: LDS_READ_RET 953; EG: LDS_READ_RET 954; EG: LDS_READ_RET 955define amdgpu_kernel void @local_v8i16_to_128(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 956 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in, align 16 957 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out, align 16 958 ret void 959} 960 961attributes #0 = { nounwind } 962