1; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s
2; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s
3
4; CHECK: 'add_i32'
5; CHECK: estimated cost of 1 for {{.*}} add i32
6define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
7  %vec = load i32, i32 addrspace(1)* %vaddr
8  %add = add i32 %vec, %b
9  store i32 %add, i32 addrspace(1)* %out
10  ret void
11}
12
13; CHECK: 'add_v2i32'
14; CHECK: estimated cost of 2 for {{.*}} add <2 x i32>
15define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
16  %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
17  %add = add <2 x i32> %vec, %b
18  store <2 x i32> %add, <2 x i32> addrspace(1)* %out
19  ret void
20}
21
22; CHECK: 'add_v3i32'
23; CHECK: estimated cost of 3 for {{.*}} add <3 x i32>
24define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
25  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
26  %add = add <3 x i32> %vec, %b
27  store <3 x i32> %add, <3 x i32> addrspace(1)* %out
28  ret void
29}
30
31; CHECK: 'add_v4i32'
32; CHECK: estimated cost of 4 for {{.*}} add <4 x i32>
33define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
34  %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
35  %add = add <4 x i32> %vec, %b
36  store <4 x i32> %add, <4 x i32> addrspace(1)* %out
37  ret void
38}
39
40; CHECK: 'add_i64'
41; CHECK: estimated cost of 2 for {{.*}} add i64
42define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
43  %vec = load i64, i64 addrspace(1)* %vaddr
44  %add = add i64 %vec, %b
45  store i64 %add, i64 addrspace(1)* %out
46  ret void
47}
48
49; CHECK: 'add_v2i64'
50; CHECK: estimated cost of 4 for {{.*}} add <2 x i64>
51define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
52  %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
53  %add = add <2 x i64> %vec, %b
54  store <2 x i64> %add, <2 x i64> addrspace(1)* %out
55  ret void
56}
57
58; CHECK: 'add_v3i64'
59; CHECK: estimated cost of 6 for {{.*}} add <3 x i64>
60define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
61  %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
62  %add = add <3 x i64> %vec, %b
63  store <3 x i64> %add, <3 x i64> addrspace(1)* %out
64  ret void
65}
66
67; CHECK: 'add_v4i64'
68; CHECK: estimated cost of 8 for {{.*}} add <4 x i64>
69define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
70  %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
71  %add = add <4 x i64> %vec, %b
72  store <4 x i64> %add, <4 x i64> addrspace(1)* %out
73  ret void
74}
75
76; CHECK: 'add_v16i64'
77; CHECK: estimated cost of 32 for {{.*}} add <16 x i64>
78define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 {
79  %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr
80  %add = add <16 x i64> %vec, %b
81  store <16 x i64> %add, <16 x i64> addrspace(1)* %out
82  ret void
83}
84
85; CHECK: 'add_i16'
86; CHECK: estimated cost of 1 for {{.*}} add i16
87define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
88  %vec = load i16, i16 addrspace(1)* %vaddr
89  %add = add i16 %vec, %b
90  store i16 %add, i16 addrspace(1)* %out
91  ret void
92}
93
94; CHECK: 'add_v2i16'
95; CHECK: estimated cost of 2 for {{.*}} add <2 x i16>
96define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
97  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
98  %add = add <2 x i16> %vec, %b
99  store <2 x i16> %add, <2 x i16> addrspace(1)* %out
100  ret void
101}
102
103; CHECK: 'sub_i32'
104; CHECK: estimated cost of 1 for {{.*}} sub i32
105define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
106  %vec = load i32, i32 addrspace(1)* %vaddr
107  %sub = sub i32 %vec, %b
108  store i32 %sub, i32 addrspace(1)* %out
109  ret void
110}
111
112; CHECK: 'sub_i64'
113; CHECK: estimated cost of 2 for {{.*}} sub i64
114define amdgpu_kernel void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
115  %vec = load i64, i64 addrspace(1)* %vaddr
116  %sub = sub i64 %vec, %b
117  store i64 %sub, i64 addrspace(1)* %out
118  ret void
119}
120; CHECK: 'sub_i16'
121; CHECK: estimated cost of 1 for {{.*}} sub i16
122define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
123  %vec = load i16, i16 addrspace(1)* %vaddr
124  %sub = sub i16 %vec, %b
125  store i16 %sub, i16 addrspace(1)* %out
126  ret void
127}
128
129; CHECK: 'sub_v2i16'
130; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16>
131define amdgpu_kernel void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
132  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
133  %sub = sub <2 x i16> %vec, %b
134  store <2 x i16> %sub, <2 x i16> addrspace(1)* %out
135  ret void
136}
137
138attributes #0 = { nounwind }
139