1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s 6 7; Testing for ds_read/write_b128 8; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 9; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 10 11; FUNC-LABEL: {{^}}local_load_i64: 12; SICIVI: s_mov_b32 m0 13; GFX9-NOT: m0 14 15; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} 16; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] 17 18; EG: LDS_READ_RET 19; EG: LDS_READ_RET 20define amdgpu_kernel void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 { 21 %ld = load i64, i64 addrspace(3)* %in 22 store i64 %ld, i64 addrspace(3)* %out 23 ret void 24} 25 26; FUNC-LABEL: {{^}}local_load_v2i64: 27; SICIVI: s_mov_b32 m0 28; GFX9-NOT: m0 29 30; GCN: ds_read2_b64 31 32; EG: LDS_READ_RET 33; EG: LDS_READ_RET 34; EG: LDS_READ_RET 35; EG: LDS_READ_RET 36define amdgpu_kernel void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 { 37entry: 38 %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in 39 store <2 x i64> %ld, <2 x i64> addrspace(3)* %out 40 ret void 41} 42 43; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 44; FUNC-LABEL: {{^}}local_load_v2i64_to_128: 45 46; CIVI: ds_read_b128 47; CIVI: ds_write_b128 48 49define amdgpu_kernel void @local_load_v2i64_to_128(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) { 50entry: 51 %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in, align 16 52 store <2 x i64> %ld, <2 x i64> addrspace(3)* %out, align 16 53 ret void 54} 55 56; FUNC-LABEL: {{^}}local_load_v3i64: 57; SICIVI: s_mov_b32 m0 58; GFX9-NOT: m0 59 60; GCN-DAG: ds_read2_b64 61; GCN-DAG: ds_read_b64 62 63; EG: LDS_READ_RET 64; EG: LDS_READ_RET 65; EG: LDS_READ_RET 66; EG: LDS_READ_RET 67; EG: LDS_READ_RET 68; EG: LDS_READ_RET 69define amdgpu_kernel void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 { 70entry: 71 %ld = load <3 x i64>, <3 x i64> addrspace(3)* %in 72 store <3 x i64> %ld, <3 x i64> addrspace(3)* %out 73 ret void 74} 75 76; FUNC-LABEL: {{^}}local_load_v4i64: 77; SICIVI: s_mov_b32 m0 78; GFX9-NOT: m0 79 80; GCN: ds_read2_b64 81; GCN: ds_read2_b64 82 83; EG: LDS_READ_RET 84; EG: LDS_READ_RET 85; EG: LDS_READ_RET 86; EG: LDS_READ_RET 87 88; EG: LDS_READ_RET 89; EG: LDS_READ_RET 90; EG: LDS_READ_RET 91; EG: LDS_READ_RET 92define amdgpu_kernel void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 { 93entry: 94 %ld = load <4 x i64>, <4 x i64> addrspace(3)* %in 95 store <4 x i64> %ld, <4 x i64> addrspace(3)* %out 96 ret void 97} 98 99; FUNC-LABEL: {{^}}local_load_v8i64: 100; SICIVI: s_mov_b32 m0 101; GFX9-NOT: m0 102 103; GCN: ds_read2_b64 104; GCN: ds_read2_b64 105; GCN: ds_read2_b64 106; GCN: ds_read2_b64 107 108; EG: LDS_READ_RET 109; EG: LDS_READ_RET 110; EG: LDS_READ_RET 111; EG: LDS_READ_RET 112; EG: LDS_READ_RET 113; EG: LDS_READ_RET 114; EG: LDS_READ_RET 115; EG: LDS_READ_RET 116; EG: LDS_READ_RET 117; EG: LDS_READ_RET 118; EG: LDS_READ_RET 119; EG: LDS_READ_RET 120; EG: LDS_READ_RET 121; EG: LDS_READ_RET 122; EG: LDS_READ_RET 123; EG: LDS_READ_RET 124define amdgpu_kernel void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 { 125entry: 126 %ld = load <8 x i64>, <8 x i64> addrspace(3)* %in 127 store <8 x i64> %ld, <8 x i64> addrspace(3)* %out 128 ret void 129} 130 131; FUNC-LABEL: {{^}}local_load_v16i64: 132; SICIVI: s_mov_b32 m0 133; GFX9-NOT: m0 134 135; GCN: ds_read2_b64 136; GCN: ds_read2_b64 137; GCN: ds_read2_b64 138; GCN: ds_read2_b64 139; GCN: ds_read2_b64 140; GCN: ds_read2_b64 141; GCN: ds_read2_b64 142; GCN: ds_read2_b64 143 144; EG: LDS_READ_RET 145; EG: LDS_READ_RET 146; EG: LDS_READ_RET 147; EG: LDS_READ_RET 148 149; EG: LDS_READ_RET 150; EG: LDS_READ_RET 151; EG: LDS_READ_RET 152; EG: LDS_READ_RET 153 154; EG: LDS_READ_RET 155; EG: LDS_READ_RET 156; EG: LDS_READ_RET 157; EG: LDS_READ_RET 158 159; EG: LDS_READ_RET 160; EG: LDS_READ_RET 161; EG: LDS_READ_RET 162; EG: LDS_READ_RET 163 164; EG: LDS_READ_RET 165; EG: LDS_READ_RET 166; EG: LDS_READ_RET 167; EG: LDS_READ_RET 168 169; EG: LDS_READ_RET 170; EG: LDS_READ_RET 171; EG: LDS_READ_RET 172; EG: LDS_READ_RET 173 174; EG: LDS_READ_RET 175; EG: LDS_READ_RET 176; EG: LDS_READ_RET 177; EG: LDS_READ_RET 178 179; EG: LDS_READ_RET 180; EG: LDS_READ_RET 181; EG: LDS_READ_RET 182; EG: LDS_READ_RET 183define amdgpu_kernel void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 { 184entry: 185 %ld = load <16 x i64>, <16 x i64> addrspace(3)* %in 186 store <16 x i64> %ld, <16 x i64> addrspace(3)* %out 187 ret void 188} 189 190attributes #0 = { nounwind } 191