1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
7
8; FUNC-LABEL: {{^}}global_load_i64:
9; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
10; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
11
12; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
13; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
14
15; EG: VTX_READ_64
16define amdgpu_kernel void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
17  %ld = load i64, i64 addrspace(1)* %in
18  store i64 %ld, i64 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}global_load_v2i64:
23; GCN-NOHSA: buffer_load_dwordx4
24; GCN-HSA: flat_load_dwordx4
25
26; EG: VTX_READ_128
27define amdgpu_kernel void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
28entry:
29  %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
30  store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
31  ret void
32}
33
34; FUNC-LABEL: {{^}}global_load_v3i64:
35; GCN-NOHSA: buffer_load_dwordx4
36; GCN-NOHSA: buffer_load_dwordx4
37
38; GCN-HSA: flat_load_dwordx4
39; GCN-HSA: flat_load_dwordx4
40
41; EG: VTX_READ_128
42; EG: VTX_READ_128
43define amdgpu_kernel void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 {
44entry:
45  %ld = load <3 x i64>, <3 x i64> addrspace(1)* %in
46  store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}global_load_v4i64:
51; GCN-NOHSA: buffer_load_dwordx4
52; GCN-NOHSA: buffer_load_dwordx4
53
54; GCN-HSA: flat_load_dwordx4
55; GCN-HSA: flat_load_dwordx4
56
57; EG: VTX_READ_128
58; EG: VTX_READ_128
59define amdgpu_kernel void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
60entry:
61  %ld = load <4 x i64>, <4 x i64> addrspace(1)* %in
62  store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
63  ret void
64}
65
66; FUNC-LABEL: {{^}}global_load_v8i64:
67; GCN-NOHSA: buffer_load_dwordx4
68; GCN-NOHSA: buffer_load_dwordx4
69; GCN-NOHSA: buffer_load_dwordx4
70; GCN-NOHSA: buffer_load_dwordx4
71
72; GCN-HSA: flat_load_dwordx4
73; GCN-HSA: flat_load_dwordx4
74; GCN-HSA: flat_load_dwordx4
75; GCN-HSA: flat_load_dwordx4
76
77; EG: VTX_READ_128
78; EG: VTX_READ_128
79; EG: VTX_READ_128
80; EG: VTX_READ_128
81define amdgpu_kernel void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 {
82entry:
83  %ld = load <8 x i64>, <8 x i64> addrspace(1)* %in
84  store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
85  ret void
86}
87
88; FUNC-LABEL: {{^}}global_load_v16i64:
89; GCN-NOHSA: buffer_load_dwordx4
90; GCN-NOHSA: buffer_load_dwordx4
91; GCN-NOHSA: buffer_load_dwordx4
92; GCN-NOHSA: buffer_load_dwordx4
93; GCN-NOHSA: buffer_load_dwordx4
94; GCN-NOHSA: buffer_load_dwordx4
95; GCN-NOHSA: buffer_load_dwordx4
96; GCN-NOHSA: buffer_load_dwordx4
97
98; GCN-HSA: flat_load_dwordx4
99; GCN-HSA: flat_load_dwordx4
100; GCN-HSA: flat_load_dwordx4
101; GCN-HSA: flat_load_dwordx4
102; GCN-HSA: flat_load_dwordx4
103; GCN-HSA: flat_load_dwordx4
104; GCN-HSA: flat_load_dwordx4
105; GCN-HSA: flat_load_dwordx4
106
107; EG: VTX_READ_128
108; EG: VTX_READ_128
109; EG: VTX_READ_128
110; EG: VTX_READ_128
111; EG: VTX_READ_128
112; EG: VTX_READ_128
113; EG: VTX_READ_128
114; EG: VTX_READ_128
115define amdgpu_kernel void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 {
116entry:
117  %ld = load <16 x i64>, <16 x i64> addrspace(1)* %in
118  store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
119  ret void
120}
121
122attributes #0 = { nounwind }
123