1; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
2
3@sPrivateStorage = external addrspace(3) global [256 x [8 x <4 x i64>]]
4
5; GCN-LABEL: {{^}}ds_reorder_vector_split:
6
7; Write zeroinitializer
8; GCN-DAG: ds_write_b64 [[PTR:v[0-9]+]], [[VAL:v\[[0-9]+:[0-9]+\]]] offset:24
9; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:16
10; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:8
11; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]]{{$}}
12
13; GCN: s_waitcnt vmcnt
14
15; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
16; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
17; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
18
19; GCN: s_waitcnt lgkmcnt
20
21; GCN-DAG ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:8
22; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16
23; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24
24
25; Appears to be dead store of vector component.
26; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
27
28; GCN: buffer_store_dwordx2
29; GCN: buffer_store_dwordx2
30; GCN: buffer_store_dwordx2
31; GCN: buffer_store_dwordx2
32; GCN: s_endpgm
33define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
34entry:
35  %tmp = tail call i32 @llvm.r600.read.local.size.y()
36  %tmp1 = tail call i32 @llvm.r600.read.local.size.z()
37  %tmp2 = tail call i32 @llvm.r600.read.tidig.x()
38  %tmp3 = tail call i32 @llvm.r600.read.tidig.y()
39  %tmp4 = tail call i32 @llvm.r600.read.tidig.z()
40  %tmp6 = mul i32 %tmp2, %tmp
41  %tmp10 = add i32 %tmp3, %tmp6
42  %tmp11 = mul i32 %tmp10, %tmp1
43  %tmp9 = add i32 %tmp11, %tmp4
44  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
45  %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
46  %mul.26.i = mul i32 %x.i.12.i, %x.i.i
47  %add.i = add i32 %tmp2, %mul.26.i
48  %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
49  store <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* %arrayidx
50  %tmp12 = sext i32 %add.i to i64
51  %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %srcValues, i64 %tmp12
52  %tmp13 = load <4 x i64>, <4 x i64> addrspace(1)* %arrayidx1
53  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %tmp12
54  %tmp14 = load i32, i32 addrspace(1)* %arrayidx2
55  %add.ptr = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 0, i32 %alignmentOffset
56  %mul.i = shl i32 %tmp14, 2
57  %arrayidx.i = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr, i32 %mul.i
58  %tmp15 = bitcast i64 addrspace(3)* %arrayidx.i to <4 x i64> addrspace(3)*
59  store <4 x i64> %tmp13, <4 x i64> addrspace(3)* %tmp15
60  %add.ptr6 = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %tmp14, i32 %alignmentOffset
61  %tmp16 = sext i32 %tmp14 to i64
62  %tmp17 = sext i32 %alignmentOffset to i64
63  %add.ptr9 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %destBuffer, i64 %tmp16, i64 %tmp17
64  %tmp18 = bitcast <4 x i64> %tmp13 to i256
65  %trunc = trunc i256 %tmp18 to i64
66  store i64 %trunc, i64 addrspace(1)* %add.ptr9
67  %arrayidx10.1 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 1
68  %tmp19 = load i64, i64 addrspace(3)* %arrayidx10.1
69  %arrayidx11.1 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 1
70  store i64 %tmp19, i64 addrspace(1)* %arrayidx11.1
71  %arrayidx10.2 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 2
72  %tmp20 = load i64, i64 addrspace(3)* %arrayidx10.2
73  %arrayidx11.2 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 2
74  store i64 %tmp20, i64 addrspace(1)* %arrayidx11.2
75  %arrayidx10.3 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 3
76  %tmp21 = load i64, i64 addrspace(3)* %arrayidx10.3
77  %arrayidx11.3 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 3
78  store i64 %tmp21, i64 addrspace(1)* %arrayidx11.3
79  ret void
80}
81
82; Function Attrs: nounwind readnone
83declare i32 @llvm.r600.read.tgid.x() #1
84
85; Function Attrs: nounwind readnone
86declare i32 @llvm.r600.read.local.size.x() #1
87
88; Function Attrs: nounwind readnone
89declare i32 @llvm.r600.read.tidig.x() #1
90
91; Function Attrs: nounwind readnone
92declare i32 @llvm.r600.read.local.size.y() #1
93
94; Function Attrs: nounwind readnone
95declare i32 @llvm.r600.read.local.size.z() #1
96
97; Function Attrs: nounwind readnone
98declare i32 @llvm.r600.read.tidig.y() #1
99
100; Function Attrs: nounwind readnone
101declare i32 @llvm.r600.read.tidig.z() #1
102
103attributes #0 = { norecurse nounwind }
104attributes #1 = { nounwind readnone }
105