1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s 6 7; Testing for ds_read_b128 8; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 9; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 10 11; FUNC-LABEL: {{^}}local_load_f64: 12; SICIV: s_mov_b32 m0 13; GFX9-NOT: m0 14 15; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} 16; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] 17 18; EG: LDS_READ_RET 19; EG: LDS_READ_RET 20define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 { 21 %ld = load double, double addrspace(3)* %in 22 store double %ld, double addrspace(3)* %out 23 ret void 24} 25 26; FUNC-LABEL: {{^}}local_load_v2f64: 27; SICIV: s_mov_b32 m0 28; GFX9-NOT: m0 29 30; GCN: ds_read2_b64 31 32; EG: LDS_READ_RET 33; EG: LDS_READ_RET 34; EG: LDS_READ_RET 35; EG: LDS_READ_RET 36define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 { 37entry: 38 %ld = load <2 x double>, <2 x double> addrspace(3)* %in 39 store <2 x double> %ld, <2 x double> addrspace(3)* %out 40 ret void 41} 42 43; FUNC-LABEL: {{^}}local_load_v3f64: 44; SICIV: s_mov_b32 m0 45; GFX9-NOT: m0 46 47; GCN-DAG: ds_read2_b64 48; GCN-DAG: ds_read_b64 49 50; EG: LDS_READ_RET 51; EG: LDS_READ_RET 52; EG: LDS_READ_RET 53; EG: LDS_READ_RET 54; EG: LDS_READ_RET 55; EG: LDS_READ_RET 56define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 { 57entry: 58 %ld = load <3 x double>, <3 x double> addrspace(3)* %in 59 store <3 x double> %ld, <3 x double> addrspace(3)* %out 60 ret void 61} 62 63; FUNC-LABEL: {{^}}local_load_v4f64: 64; SICIV: s_mov_b32 m0 65; GFX9-NOT: m0 66 67; GCN: ds_read2_b64 68; GCN: ds_read2_b64 69 70; EG: LDS_READ_RET 71; EG: LDS_READ_RET 72; EG: LDS_READ_RET 73; EG: LDS_READ_RET 74 75; EG: LDS_READ_RET 76; EG: LDS_READ_RET 77; EG: LDS_READ_RET 78; EG: LDS_READ_RET 79define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 { 80entry: 81 %ld = load <4 x double>, <4 x double> addrspace(3)* %in 82 store <4 x double> %ld, <4 x double> addrspace(3)* %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}local_load_v8f64: 87; SICIV: s_mov_b32 m0 88; GFX9-NOT: m0 89 90; GCN: ds_read2_b64 91; GCN: ds_read2_b64 92; GCN: ds_read2_b64 93; GCN: ds_read2_b64 94 95; EG: LDS_READ_RET 96; EG: LDS_READ_RET 97; EG: LDS_READ_RET 98; EG: LDS_READ_RET 99; EG: LDS_READ_RET 100; EG: LDS_READ_RET 101; EG: LDS_READ_RET 102; EG: LDS_READ_RET 103; EG: LDS_READ_RET 104; EG: LDS_READ_RET 105; EG: LDS_READ_RET 106; EG: LDS_READ_RET 107; EG: LDS_READ_RET 108; EG: LDS_READ_RET 109; EG: LDS_READ_RET 110; EG: LDS_READ_RET 111define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 { 112entry: 113 %ld = load <8 x double>, <8 x double> addrspace(3)* %in 114 store <8 x double> %ld, <8 x double> addrspace(3)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}local_load_v16f64: 119; SICIV: s_mov_b32 m0 120; GFX9-NOT: m0 121 122; GCN: ds_read2_b64 123; GCN: ds_read2_b64 124; GCN: ds_read2_b64 125; GCN: ds_read2_b64 126; GCN: ds_read2_b64 127; GCN: ds_read2_b64 128; GCN: ds_read2_b64 129; GCN: ds_read2_b64 130 131; EG: LDS_READ_RET 132; EG: LDS_READ_RET 133; EG: LDS_READ_RET 134; EG: LDS_READ_RET 135 136; EG: LDS_READ_RET 137; EG: LDS_READ_RET 138; EG: LDS_READ_RET 139; EG: LDS_READ_RET 140 141; EG: LDS_READ_RET 142; EG: LDS_READ_RET 143; EG: LDS_READ_RET 144; EG: LDS_READ_RET 145 146; EG: LDS_READ_RET 147; EG: LDS_READ_RET 148; EG: LDS_READ_RET 149; EG: LDS_READ_RET 150 151; EG: LDS_READ_RET 152; EG: LDS_READ_RET 153; EG: LDS_READ_RET 154; EG: LDS_READ_RET 155 156; EG: LDS_READ_RET 157; EG: LDS_READ_RET 158; EG: LDS_READ_RET 159; EG: LDS_READ_RET 160 161; EG: LDS_READ_RET 162; EG: LDS_READ_RET 163; EG: LDS_READ_RET 164; EG: LDS_READ_RET 165 166; EG: LDS_READ_RET 167; EG: LDS_READ_RET 168; EG: LDS_READ_RET 169; EG: LDS_READ_RET 170define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 { 171entry: 172 %ld = load <16 x double>, <16 x double> addrspace(3)* %in 173 store <16 x double> %ld, <16 x double> addrspace(3)* %out 174 ret void 175} 176 177; Tests if ds_read_b128 gets generated for the 16 byte aligned load. 178; FUNC-LABEL: {{^}}local_load_v2f64_to_128: 179 180; CIVI: ds_read_b128 181; CIVI: ds_write_b128 182 183; EG: LDS_READ_RET 184; EG: LDS_READ_RET 185; EG: LDS_READ_RET 186; EG: LDS_READ_RET 187define amdgpu_kernel void @local_load_v2f64_to_128(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) { 188entry: 189 %ld = load <2 x double>, <2 x double> addrspace(3)* %in, align 16 190 store <2 x double> %ld, <2 x double> addrspace(3)* %out, align 16 191 ret void 192} 193 194attributes #0 = { nounwind } 195