1; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
2; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s
3; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s
4
5; GCN: 'extractelement_v2i32'
6; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i32>
7define amdgpu_kernel void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
8  %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
9  %elt = extractelement <2 x i32> %vec, i32 1
10  store i32 %elt, i32 addrspace(1)* %out
11  ret void
12}
13
14; GCN: 'extractelement_v2f32'
15; GCN: estimated cost of 0 for {{.*}} extractelement <2 x float>
16define amdgpu_kernel void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
17  %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
18  %elt = extractelement <2 x float> %vec, i32 1
19  store float %elt, float addrspace(1)* %out
20  ret void
21}
22
23; GCN: 'extractelement_v3i32'
24; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i32>
25define amdgpu_kernel void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
26  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
27  %elt = extractelement <3 x i32> %vec, i32 1
28  store i32 %elt, i32 addrspace(1)* %out
29  ret void
30}
31
32; GCN: 'extractelement_v4i32'
33; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i32>
34define amdgpu_kernel void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
35  %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
36  %elt = extractelement <4 x i32> %vec, i32 1
37  store i32 %elt, i32 addrspace(1)* %out
38  ret void
39}
40
41; GCN: 'extractelement_v8i32'
42; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i32>
43define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
44  %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
45  %elt = extractelement <8 x i32> %vec, i32 1
46  store i32 %elt, i32 addrspace(1)* %out
47  ret void
48}
49
50; FIXME: Should be non-0
51; GCN: 'extractelement_v8i32_dynindex'
52; GCN: estimated cost of 2 for {{.*}} extractelement <8 x i32>
53define amdgpu_kernel void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
54  %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
55  %elt = extractelement <8 x i32> %vec, i32 %idx
56  store i32 %elt, i32 addrspace(1)* %out
57  ret void
58}
59
60; GCN: 'extractelement_v2i64'
61; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i64>
62define amdgpu_kernel void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
63  %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
64  %elt = extractelement <2 x i64> %vec, i64 1
65  store i64 %elt, i64 addrspace(1)* %out
66  ret void
67}
68
69; GCN: 'extractelement_v3i64'
70; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i64>
71define amdgpu_kernel void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
72  %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
73  %elt = extractelement <3 x i64> %vec, i64 1
74  store i64 %elt, i64 addrspace(1)* %out
75  ret void
76}
77
78; GCN: 'extractelement_v4i64'
79; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i64>
80define amdgpu_kernel void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
81  %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
82  %elt = extractelement <4 x i64> %vec, i64 1
83  store i64 %elt, i64 addrspace(1)* %out
84  ret void
85}
86
87; GCN: 'extractelement_v8i64'
88; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i64>
89define amdgpu_kernel void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
90  %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
91  %elt = extractelement <8 x i64> %vec, i64 1
92  store i64 %elt, i64 addrspace(1)* %out
93  ret void
94}
95
96; GCN: 'extractelement_v4i8'
97; GCN: estimated cost of 1 for {{.*}} extractelement <4 x i8>
98define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
99  %vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr
100  %elt = extractelement <4 x i8> %vec, i8 1
101  store i8 %elt, i8 addrspace(1)* %out
102  ret void
103}
104
105; GCN: 'extractelement_0_v2i16':
106; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0
107; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16>
108; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16>
109define amdgpu_kernel void @extractelement_0_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
110  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
111  %elt = extractelement <2 x i16> %vec, i16 0
112  store i16 %elt, i16 addrspace(1)* %out
113  ret void
114}
115
116; GCN: 'extractelement_1_v2i16':
117; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
118define amdgpu_kernel void @extractelement_1_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
119  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
120  %elt = extractelement <2 x i16> %vec, i16 1
121  store i16 %elt, i16 addrspace(1)* %out
122  ret void
123}
124
125; GCN: 'extractelement_var_v2i16'
126; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
127define amdgpu_kernel void @extractelement_var_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, i32 %idx) {
128  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
129  %elt = extractelement <2 x i16> %vec, i32 %idx
130  store i16 %elt, i16 addrspace(1)* %out
131  ret void
132}
133