1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4
5define i16 @v_trunc_i32_to_i16(i32 %src) {
6; GFX7-LABEL: v_trunc_i32_to_i16:
7; GFX7:       ; %bb.0:
8; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX7-NEXT:    s_setpc_b64 s[30:31]
10;
11; GFX8-LABEL: v_trunc_i32_to_i16:
12; GFX8:       ; %bb.0:
13; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX8-NEXT:    s_setpc_b64 s[30:31]
15  %trunc = trunc i32 %src to i16
16  ret i16 %trunc
17}
18
19define amdgpu_ps i16 @s_trunc_i32_to_i16(i32 inreg %src) {
20; GFX7-LABEL: s_trunc_i32_to_i16:
21; GFX7:       ; %bb.0:
22; GFX7-NEXT:    ; return to shader part epilog
23;
24; GFX8-LABEL: s_trunc_i32_to_i16:
25; GFX8:       ; %bb.0:
26; GFX8-NEXT:    ; return to shader part epilog
27  %trunc = trunc i32 %src to i16
28  ret i16 %trunc
29}
30
31define i16 @v_trunc_i64_to_i16(i64 %src) {
32; GFX7-LABEL: v_trunc_i64_to_i16:
33; GFX7:       ; %bb.0:
34; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX7-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX8-LABEL: v_trunc_i64_to_i16:
38; GFX8:       ; %bb.0:
39; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX8-NEXT:    s_setpc_b64 s[30:31]
41  %trunc = trunc i64 %src to i16
42  ret i16 %trunc
43}
44
45define amdgpu_ps i16 @s_trunc_i64_to_i16(i64 inreg %src) {
46; GFX7-LABEL: s_trunc_i64_to_i16:
47; GFX7:       ; %bb.0:
48; GFX7-NEXT:    ; return to shader part epilog
49;
50; GFX8-LABEL: s_trunc_i64_to_i16:
51; GFX8:       ; %bb.0:
52; GFX8-NEXT:    ; return to shader part epilog
53  %trunc = trunc i64 %src to i16
54  ret i16 %trunc
55}
56
57define amdgpu_ps i16 @s_trunc_i128_to_i16(i128 inreg %src) {
58; GFX7-LABEL: s_trunc_i128_to_i16:
59; GFX7:       ; %bb.0:
60; GFX7-NEXT:    ; return to shader part epilog
61;
62; GFX8-LABEL: s_trunc_i128_to_i16:
63; GFX8:       ; %bb.0:
64; GFX8-NEXT:    ; return to shader part epilog
65  %trunc = trunc i128 %src to i16
66  ret i16 %trunc
67}
68
69define i16 @v_trunc_i128_to_i16(i128 %src) {
70; GFX7-LABEL: v_trunc_i128_to_i16:
71; GFX7:       ; %bb.0:
72; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX7-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX8-LABEL: v_trunc_i128_to_i16:
76; GFX8:       ; %bb.0:
77; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX8-NEXT:    s_setpc_b64 s[30:31]
79  %trunc = trunc i128 %src to i16
80  ret i16 %trunc
81}
82
83define i32 @v_trunc_v2i32_to_v2i16(<2 x i32> %src) {
84; GFX7-LABEL: v_trunc_v2i32_to_v2i16:
85; GFX7:       ; %bb.0:
86; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
88; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
89; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
90; GFX7-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX8-LABEL: v_trunc_v2i32_to_v2i16:
93; GFX8:       ; %bb.0:
94; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX8-NEXT:    v_mov_b32_sdwa v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
96; GFX8-NEXT:    s_setpc_b64 s[30:31]
97  %trunc = trunc <2 x i32> %src to <2 x i16>
98  %cast = bitcast <2 x i16> %trunc to i32
99  ret i32 %cast
100}
101
102define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
103; GFX7-LABEL: s_trunc_v2i32_to_v2i16:
104; GFX7:       ; %bb.0:
105; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
106; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
107; GFX7-NEXT:    s_or_b32 s0, s1, s0
108; GFX7-NEXT:    ; return to shader part epilog
109;
110; GFX8-LABEL: s_trunc_v2i32_to_v2i16:
111; GFX8:       ; %bb.0:
112; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
113; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
114; GFX8-NEXT:    s_or_b32 s0, s1, s0
115; GFX8-NEXT:    ; return to shader part epilog
116  %trunc = trunc <2 x i32> %src to <2 x i16>
117  %cast = bitcast <2 x i16> %trunc to i32
118  ret i32 %cast
119}
120
121; ; FIXME: G_INSERT mishandled
122; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) {
123;   %trunc = trunc <3 x i32> %src to <3 x i16>
124;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
125;   %cast = bitcast <4 x i16> %ext to <2 x i32>
126;   ret <2 x i32> %cast
127; }
128
129; ; FIXME: G_INSERT mishandled
130; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) {
131;   %trunc = trunc <3 x i32> %src to <3 x i16>
132;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
133;   %cast = bitcast <4 x i16> %ext to <2 x i32>
134;   ret <2 x i32> %cast
135; }
136
137define <2 x i32> @v_trunc_v4i32_to_v4i16(<4 x i32> %src) {
138; GFX7-LABEL: v_trunc_v4i32_to_v4i16:
139; GFX7:       ; %bb.0:
140; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX7-NEXT:    v_mov_b32_e32 v4, 0xffff
142; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
143; GFX7-NEXT:    v_and_b32_e32 v0, v0, v4
144; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
145; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
146; GFX7-NEXT:    v_and_b32_e32 v2, v2, v4
147; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
148; GFX7-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX8-LABEL: v_trunc_v4i32_to_v4i16:
151; GFX8:       ; %bb.0:
152; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX8-NEXT:    v_mov_b32_sdwa v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
154; GFX8-NEXT:    v_mov_b32_sdwa v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
155; GFX8-NEXT:    v_mov_b32_e32 v1, v2
156; GFX8-NEXT:    s_setpc_b64 s[30:31]
157  %trunc = trunc <4 x i32> %src to <4 x i16>
158  %cast = bitcast <4 x i16> %trunc to <2 x i32>
159  ret <2 x i32> %cast
160}
161
162define amdgpu_ps <2 x i32> @s_trunc_v4i32_to_v4i16(<4 x i32> inreg %src) {
163; GFX7-LABEL: s_trunc_v4i32_to_v4i16:
164; GFX7:       ; %bb.0:
165; GFX7-NEXT:    s_mov_b32 s4, 0xffff
166; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
167; GFX7-NEXT:    s_and_b32 s0, s0, s4
168; GFX7-NEXT:    s_or_b32 s0, s1, s0
169; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
170; GFX7-NEXT:    s_and_b32 s2, s2, s4
171; GFX7-NEXT:    s_or_b32 s1, s1, s2
172; GFX7-NEXT:    ; return to shader part epilog
173;
174; GFX8-LABEL: s_trunc_v4i32_to_v4i16:
175; GFX8:       ; %bb.0:
176; GFX8-NEXT:    s_mov_b32 s4, 0xffff
177; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
178; GFX8-NEXT:    s_and_b32 s0, s0, s4
179; GFX8-NEXT:    s_or_b32 s0, s1, s0
180; GFX8-NEXT:    s_lshl_b32 s1, s3, 16
181; GFX8-NEXT:    s_and_b32 s2, s2, s4
182; GFX8-NEXT:    s_or_b32 s1, s1, s2
183; GFX8-NEXT:    ; return to shader part epilog
184  %trunc = trunc <4 x i32> %src to <4 x i16>
185  %cast = bitcast <4 x i16> %trunc to <2 x i32>
186  ret <2 x i32> %cast
187}
188