1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
3
4; CHECK-LABEL: {{^}}trunc_i64_bitcast_v2i32:
5; CHECK: buffer_load_dword v
6; CHECK: buffer_store_dword v
7define void @trunc_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
8  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
9  %bc = bitcast <2 x i32> %ld to i64
10  %trunc = trunc i64 %bc to i32
11  store i32 %trunc, i32 addrspace(1)* %out
12  ret void
13}
14
15; CHECK-LABEL: {{^}}trunc_i96_bitcast_v3i32:
16; CHECK: buffer_load_dword v
17; CHECK: buffer_store_dword v
18define void @trunc_i96_bitcast_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %in) {
19  %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
20  %bc = bitcast <3 x i32> %ld to i96
21  %trunc = trunc i96 %bc to i32
22  store i32 %trunc, i32 addrspace(1)* %out
23  ret void
24}
25
26; CHECK-LABEL: {{^}}trunc_i128_bitcast_v4i32:
27; CHECK: buffer_load_dword v
28; CHECK: buffer_store_dword v
29define void @trunc_i128_bitcast_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
30  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
31  %bc = bitcast <4 x i32> %ld to i128
32  %trunc = trunc i128 %bc to i32
33  store i32 %trunc, i32 addrspace(1)* %out
34  ret void
35}
36
37; Don't want load width reduced in this case.
38; CHECK-LABEL: {{^}}trunc_i16_bitcast_v2i16:
39; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
40; CHECK: buffer_store_short [[VAL]]
41define void @trunc_i16_bitcast_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
42  %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
43  %bc = bitcast <2 x i16> %ld to i32
44  %trunc = trunc i32 %bc to i16
45  store i16 %trunc, i16 addrspace(1)* %out
46  ret void
47}
48
49; CHECK-LABEL: {{^}}trunc_i16_bitcast_v4i16:
50; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
51; CHECK: buffer_store_short [[VAL]]
52define void @trunc_i16_bitcast_v4i16(i16 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
53  %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
54  %bc = bitcast <4 x i16> %ld to i64
55  %trunc = trunc i64 %bc to i16
56  store i16 %trunc, i16 addrspace(1)* %out
57  ret void
58}
59
60; FIXME: Don't want load width reduced in this case.
61; CHECK-LABEL: {{^}}trunc_i8_bitcast_v2i8:
62; CHECK: buffer_load_ubyte [[VAL:v[0-9]+]]
63; CHECK: buffer_store_byte [[VAL]]
64define void @trunc_i8_bitcast_v2i8(i8 addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
65  %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
66  %bc = bitcast <2 x i8> %ld to i16
67  %trunc = trunc i16 %bc to i8
68  store i8 %trunc, i8 addrspace(1)* %out
69  ret void
70}
71
72; CHECK-LABEL: {{^}}trunc_i32_bitcast_v4i8:
73; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
74; CHECK: buffer_store_byte [[VAL]]
75define void @trunc_i32_bitcast_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
76  %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
77  %bc = bitcast <4 x i8> %ld to i32
78  %trunc = trunc i32 %bc to i8
79  store i8 %trunc, i8 addrspace(1)* %out
80  ret void
81}
82
83; CHECK-LABEL: {{^}}trunc_i24_bitcast_v3i8:
84; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
85; CHECK: buffer_store_byte [[VAL]]
86define void @trunc_i24_bitcast_v3i8(i8 addrspace(1)* %out, <3 x i8> addrspace(1)* %in) {
87  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
88  %bc = bitcast <3 x i8> %ld to i24
89  %trunc = trunc i24 %bc to i8
90  store i8 %trunc, i8 addrspace(1)* %out
91  ret void
92}
93