1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; How the replacement of i64 stores with v2i32 stores resulted in
5; breaking other users of the bitcast if they already existed
6
7; GCN-LABEL: {{^}}extract_vector_elt_select_error:
8; GCN: buffer_store_dword
9; GCN: buffer_store_dword
10; GCN: buffer_store_dwordx2
11define amdgpu_kernel void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) #0 {
12  %vec = bitcast i64 %val to <2 x i32>
13  %elt0 = extractelement <2 x i32> %vec, i32 0
14  %elt1 = extractelement <2 x i32> %vec, i32 1
15
16  store volatile i32 %elt0, i32 addrspace(1)* %out
17  store volatile i32 %elt1, i32 addrspace(1)* %out
18  store volatile i64 %val, i64 addrspace(1)* %in
19  ret void
20}
21
22; GCN-LABEL: {{^}}extract_vector_elt_v2i64:
23define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) #0 {
24  %p0 = extractelement <2 x i64> %foo, i32 0
25  %p1 = extractelement <2 x i64> %foo, i32 1
26  %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1
27  store volatile i64 %p1, i64 addrspace(1)* %out
28  store volatile i64 %p0, i64 addrspace(1)* %out1
29  ret void
30}
31
32; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64:
33; GCN-NOT: buffer_load
34; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
35; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
36; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
37; GCN: store_dwordx2 v[{{[0-9:]+}}]
38define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 {
39  %dynelt = extractelement <2 x i64> %foo, i32 %elt
40  store volatile i64 %dynelt, i64 addrspace(1)* %out
41  ret void
42}
43
44; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64_2:
45; GCN:     buffer_load_dwordx4
46; GCN-NOT: buffer_load
47; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
48; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
49; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
50; GCN: store_dwordx2 v[{{[0-9:]+}}]
51define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) #0 {
52  %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo
53  %or = or <2 x i64> %load, %arst
54  %dynelt = extractelement <2 x i64> %or, i32 %elt
55  store volatile i64 %dynelt, i64 addrspace(1)* %out
56  ret void
57}
58
59; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64:
60; GCN-NOT: buffer_load
61; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
62; GCN-DAG: v_cmp_eq_u32_e64 [[C2:[^,]+]], [[IDX]], 2
63; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
64; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
65; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
66; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
67; GCN: store_dwordx2 v[{{[0-9:]+}}]
68define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 {
69  %dynelt = extractelement <3 x i64> %foo, i32 %elt
70  store volatile i64 %dynelt, i64 addrspace(1)* %out
71  ret void
72}
73
74; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64:
75; GCN-NOT: buffer_load
76; GCN-DAG: v_cmp_eq_u32_e64 [[C1:[^,]+]], [[IDX:s[0-9]+]], 1
77; GCN-DAG: v_cmp_eq_u32_e64 [[C2:[^,]+]], [[IDX]], 2
78; GCN-DAG: v_cmp_eq_u32_e64 [[C3:[^,]+]], [[IDX]], 3
79; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
80; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
81; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
82; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
83; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]]
84; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]]
85; GCN: store_dwordx2 v[{{[0-9:]+}}]
86define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 {
87  %dynelt = extractelement <4 x i64> %foo, i32 %elt
88  store volatile i64 %dynelt, i64 addrspace(1)* %out
89  ret void
90}
91
92attributes #0 = { nounwind }
93