1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}global_load_f64:
6; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
7; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
8
9; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
10; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
11define amdgpu_kernel void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
12  %ld = load double, double addrspace(1)* %in
13  store double %ld, double addrspace(1)* %out
14  ret void
15}
16
17; FUNC-LABEL: {{^}}global_load_v2f64:
18; GCN-NOHSA: buffer_load_dwordx4
19; GCN-HSA: flat_load_dwordx4
20define amdgpu_kernel void @global_load_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
21entry:
22  %ld = load <2 x double>, <2 x double> addrspace(1)* %in
23  store <2 x double> %ld, <2 x double> addrspace(1)* %out
24  ret void
25}
26
27; FUNC-LABEL: {{^}}global_load_v3f64:
28; GCN-NOHSA: buffer_load_dwordx4
29; GCN-NOHSA: buffer_load_dwordx4
30; GCN-HSA: flat_load_dwordx4
31; GCN-HSA: flat_load_dwordx4
32define amdgpu_kernel void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
33entry:
34  %ld = load <3 x double>, <3 x double> addrspace(1)* %in
35  store <3 x double> %ld, <3 x double> addrspace(1)* %out
36  ret void
37}
38
39; FUNC-LABEL: {{^}}global_load_v4f64:
40; GCN-NOHSA: buffer_load_dwordx4
41; GCN-NOHSA: buffer_load_dwordx4
42
43; GCN-HSA: flat_load_dwordx4
44; GCN-HSA: flat_load_dwordx4
45define amdgpu_kernel void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
46entry:
47  %ld = load <4 x double>, <4 x double> addrspace(1)* %in
48  store <4 x double> %ld, <4 x double> addrspace(1)* %out
49  ret void
50}
51
52; FUNC-LABEL: {{^}}global_load_v8f64:
53; GCN-NOHSA: buffer_load_dwordx4
54; GCN-NOHSA: buffer_load_dwordx4
55; GCN-NOHSA: buffer_load_dwordx4
56; GCN-NOHSA: buffer_load_dwordx4
57
58; GCN-HSA: flat_load_dwordx4
59; GCN-HSA: flat_load_dwordx4
60; GCN-HSA: flat_load_dwordx4
61; GCN-HSA: flat_load_dwordx4
62define amdgpu_kernel void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
63entry:
64  %ld = load <8 x double>, <8 x double> addrspace(1)* %in
65  store <8 x double> %ld, <8 x double> addrspace(1)* %out
66  ret void
67}
68
69; FUNC-LABEL: {{^}}global_load_v16f64:
70; GCN-NOHSA: buffer_load_dwordx4
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73; GCN-NOHSA: buffer_load_dwordx4
74; GCN-NOHSA: buffer_load_dwordx4
75; GCN-NOHSA: buffer_load_dwordx4
76; GCN-NOHSA: buffer_load_dwordx4
77; GCN-NOHSA: buffer_load_dwordx4
78
79; GCN-HSA: flat_load_dwordx4
80; GCN-HSA: flat_load_dwordx4
81; GCN-HSA: flat_load_dwordx4
82; GCN-HSA: flat_load_dwordx4
83; GCN-HSA: flat_load_dwordx4
84; GCN-HSA: flat_load_dwordx4
85; GCN-HSA: flat_load_dwordx4
86; GCN-HSA: flat_load_dwordx4
87define amdgpu_kernel void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
88entry:
89  %ld = load <16 x double>, <16 x double> addrspace(1)* %in
90  store <16 x double> %ld, <16 x double> addrspace(1)* %out
91  ret void
92}
93
94attributes #0 = { nounwind }
95