1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6
7define float @v_roundeven_f32(float %x) {
8; GFX6-LABEL: v_roundeven_f32:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
12; GFX6-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX7-LABEL: v_roundeven_f32:
15; GFX7:       ; %bb.0:
16; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
18; GFX7-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX8-LABEL: v_roundeven_f32:
21; GFX8:       ; %bb.0:
22; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
24; GFX8-NEXT:    s_setpc_b64 s[30:31]
25;
26; GFX9-LABEL: v_roundeven_f32:
27; GFX9:       ; %bb.0:
28; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
30; GFX9-NEXT:    s_setpc_b64 s[30:31]
31  %roundeven = call float @llvm.roundeven.f32(float %x)
32  ret float %roundeven
33}
34
35define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
36; GFX6-LABEL: v_roundeven_v2f32:
37; GFX6:       ; %bb.0:
38; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
40; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
41; GFX6-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX7-LABEL: v_roundeven_v2f32:
44; GFX7:       ; %bb.0:
45; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
47; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
48; GFX7-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX8-LABEL: v_roundeven_v2f32:
51; GFX8:       ; %bb.0:
52; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
54; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
55; GFX8-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX9-LABEL: v_roundeven_v2f32:
58; GFX9:       ; %bb.0:
59; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
61; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
62; GFX9-NEXT:    s_setpc_b64 s[30:31]
63  %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
64  ret <2 x float> %roundeven
65}
66
67define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
68; GFX6-LABEL: v_roundeven_v3f32:
69; GFX6:       ; %bb.0:
70; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
72; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
73; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
74; GFX6-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX7-LABEL: v_roundeven_v3f32:
77; GFX7:       ; %bb.0:
78; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
80; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
81; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
82; GFX7-NEXT:    s_setpc_b64 s[30:31]
83;
84; GFX8-LABEL: v_roundeven_v3f32:
85; GFX8:       ; %bb.0:
86; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
88; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
89; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
90; GFX8-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX9-LABEL: v_roundeven_v3f32:
93; GFX9:       ; %bb.0:
94; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
96; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
97; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
98; GFX9-NEXT:    s_setpc_b64 s[30:31]
99  %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
100  ret <3 x float> %roundeven
101}
102
103define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
104; GFX6-LABEL: v_roundeven_v4f32:
105; GFX6:       ; %bb.0:
106; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
108; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
109; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
110; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
111; GFX6-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX7-LABEL: v_roundeven_v4f32:
114; GFX7:       ; %bb.0:
115; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
117; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
118; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
119; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
120; GFX7-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX8-LABEL: v_roundeven_v4f32:
123; GFX8:       ; %bb.0:
124; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
126; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
127; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
128; GFX8-NEXT:    v_rndne_f32_e32 v3, v3
129; GFX8-NEXT:    s_setpc_b64 s[30:31]
130;
131; GFX9-LABEL: v_roundeven_v4f32:
132; GFX9:       ; %bb.0:
133; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
135; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
136; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
137; GFX9-NEXT:    v_rndne_f32_e32 v3, v3
138; GFX9-NEXT:    s_setpc_b64 s[30:31]
139  %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
140  ret <4 x float> %roundeven
141}
142
143define half @v_roundeven_f16(half %x) {
144; GFX6-LABEL: v_roundeven_f16:
145; GFX6:       ; %bb.0:
146; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
148; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
149; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
150; GFX6-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX7-LABEL: v_roundeven_f16:
153; GFX7:       ; %bb.0:
154; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
156; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
157; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
158; GFX7-NEXT:    s_setpc_b64 s[30:31]
159;
160; GFX8-LABEL: v_roundeven_f16:
161; GFX8:       ; %bb.0:
162; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GFX8-NEXT:    v_rndne_f16_e32 v0, v0
164; GFX8-NEXT:    s_setpc_b64 s[30:31]
165;
166; GFX9-LABEL: v_roundeven_f16:
167; GFX9:       ; %bb.0:
168; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
170; GFX9-NEXT:    s_setpc_b64 s[30:31]
171  %roundeven = call half @llvm.roundeven.f16(half %x)
172  ret half %roundeven
173}
174
175define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
176; GFX6-LABEL: v_roundeven_v2f16:
177; GFX6:       ; %bb.0:
178; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
180; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
181; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
182; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
183; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
184; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
185; GFX6-NEXT:    s_setpc_b64 s[30:31]
186;
187; GFX7-LABEL: v_roundeven_v2f16:
188; GFX7:       ; %bb.0:
189; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
191; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
192; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
193; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
194; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
195; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
196; GFX7-NEXT:    s_setpc_b64 s[30:31]
197;
198; GFX8-LABEL: v_roundeven_v2f16:
199; GFX8:       ; %bb.0:
200; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
202; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
203; GFX8-NEXT:    v_mov_b32_e32 v2, 16
204; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
205; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
206; GFX8-NEXT:    s_setpc_b64 s[30:31]
207;
208; GFX9-LABEL: v_roundeven_v2f16:
209; GFX9:       ; %bb.0:
210; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
212; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
213; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
214; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
215; GFX9-NEXT:    s_setpc_b64 s[30:31]
216  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
217  ret <2 x half> %roundeven
218}
219
220define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
221; GFX6-LABEL: v_roundeven_v2f16_fneg:
222; GFX6:       ; %bb.0:
223; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
225; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
226; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
227; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
228; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v0
229; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
230; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v0
231; GFX6-NEXT:    v_rndne_f32_e32 v0, v1
232; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
233; GFX6-NEXT:    v_rndne_f32_e32 v1, v2
234; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
235; GFX6-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX7-LABEL: v_roundeven_v2f16_fneg:
238; GFX7:       ; %bb.0:
239; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
241; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
242; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
243; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
244; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v0
245; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
246; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v0
247; GFX7-NEXT:    v_rndne_f32_e32 v0, v1
248; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
249; GFX7-NEXT:    v_rndne_f32_e32 v1, v2
250; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
251; GFX7-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX8-LABEL: v_roundeven_v2f16_fneg:
254; GFX8:       ; %bb.0:
255; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
257; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
258; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
259; GFX8-NEXT:    v_mov_b32_e32 v2, 16
260; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
261; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
262; GFX8-NEXT:    s_setpc_b64 s[30:31]
263;
264; GFX9-LABEL: v_roundeven_v2f16_fneg:
265; GFX9:       ; %bb.0:
266; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
268; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
269; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
270; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
271; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
272; GFX9-NEXT:    s_setpc_b64 s[30:31]
273  %x.fneg = fneg <2 x half> %x
274  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
275  ret <2 x half> %roundeven
276}
277
278define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
279; GFX6-LABEL: v_roundeven_v4f16:
280; GFX6:       ; %bb.0:
281; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
283; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
284; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
285; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
286; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
287; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
288; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
289; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
290; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
291; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
292; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
293; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
294; GFX6-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX7-LABEL: v_roundeven_v4f16:
297; GFX7:       ; %bb.0:
298; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
300; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
301; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
302; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
303; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
304; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
305; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
306; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
307; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
308; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
309; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
310; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
311; GFX7-NEXT:    s_setpc_b64 s[30:31]
312;
313; GFX8-LABEL: v_roundeven_v4f16:
314; GFX8:       ; %bb.0:
315; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX8-NEXT:    v_rndne_f16_e32 v2, v0
317; GFX8-NEXT:    v_rndne_f16_e32 v3, v1
318; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
319; GFX8-NEXT:    v_mov_b32_e32 v4, 16
320; GFX8-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
321; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
322; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
323; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
324; GFX8-NEXT:    v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
325; GFX8-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX9-LABEL: v_roundeven_v4f16:
328; GFX9:       ; %bb.0:
329; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX9-NEXT:    v_rndne_f16_e32 v2, v0
331; GFX9-NEXT:    v_rndne_f16_e32 v3, v1
332; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
333; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
334; GFX9-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
335; GFX9-NEXT:    v_and_or_b32 v0, v2, v4, v0
336; GFX9-NEXT:    v_and_or_b32 v1, v3, v4, v1
337; GFX9-NEXT:    s_setpc_b64 s[30:31]
338  %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
339  ret <4 x half> %roundeven
340}
341
342
343define float @v_roundeven_f32_fabs(float %x) {
344; GFX6-LABEL: v_roundeven_f32_fabs:
345; GFX6:       ; %bb.0:
346; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX6-NEXT:    v_rndne_f32_e64 v0, |v0|
348; GFX6-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX7-LABEL: v_roundeven_f32_fabs:
351; GFX7:       ; %bb.0:
352; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX7-NEXT:    v_rndne_f32_e64 v0, |v0|
354; GFX7-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX8-LABEL: v_roundeven_f32_fabs:
357; GFX8:       ; %bb.0:
358; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX8-NEXT:    v_rndne_f32_e64 v0, |v0|
360; GFX8-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX9-LABEL: v_roundeven_f32_fabs:
363; GFX9:       ; %bb.0:
364; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX9-NEXT:    v_rndne_f32_e64 v0, |v0|
366; GFX9-NEXT:    s_setpc_b64 s[30:31]
367  %fabs.x = call float @llvm.fabs.f32(float %x)
368  %roundeven = call float @llvm.roundeven.f32(float %fabs.x)
369  ret float %roundeven
370}
371
372define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
373; GFX6-LABEL: s_roundeven_f32:
374; GFX6:       ; %bb.0:
375; GFX6-NEXT:    v_rndne_f32_e32 v0, s0
376; GFX6-NEXT:    ; return to shader part epilog
377;
378; GFX7-LABEL: s_roundeven_f32:
379; GFX7:       ; %bb.0:
380; GFX7-NEXT:    v_rndne_f32_e32 v0, s0
381; GFX7-NEXT:    ; return to shader part epilog
382;
383; GFX8-LABEL: s_roundeven_f32:
384; GFX8:       ; %bb.0:
385; GFX8-NEXT:    v_rndne_f32_e32 v0, s0
386; GFX8-NEXT:    ; return to shader part epilog
387;
388; GFX9-LABEL: s_roundeven_f32:
389; GFX9:       ; %bb.0:
390; GFX9-NEXT:    v_rndne_f32_e32 v0, s0
391; GFX9-NEXT:    ; return to shader part epilog
392  %roundeven = call float @llvm.roundeven.f32(float %x)
393  ret float %roundeven
394}
395
396define float @v_roundeven_f32_fneg(float %x) {
397; GFX6-LABEL: v_roundeven_f32_fneg:
398; GFX6:       ; %bb.0:
399; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400; GFX6-NEXT:    v_rndne_f32_e64 v0, -v0
401; GFX6-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX7-LABEL: v_roundeven_f32_fneg:
404; GFX7:       ; %bb.0:
405; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX7-NEXT:    v_rndne_f32_e64 v0, -v0
407; GFX7-NEXT:    s_setpc_b64 s[30:31]
408;
409; GFX8-LABEL: v_roundeven_f32_fneg:
410; GFX8:       ; %bb.0:
411; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX8-NEXT:    v_rndne_f32_e64 v0, -v0
413; GFX8-NEXT:    s_setpc_b64 s[30:31]
414;
415; GFX9-LABEL: v_roundeven_f32_fneg:
416; GFX9:       ; %bb.0:
417; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418; GFX9-NEXT:    v_rndne_f32_e64 v0, -v0
419; GFX9-NEXT:    s_setpc_b64 s[30:31]
420  %neg.x = fneg float %x
421  %roundeven = call float @llvm.roundeven.f32(float %neg.x)
422  ret float %roundeven
423}
424
425define double @v_roundeven_f64(double %x) {
426; GFX6-LABEL: v_roundeven_f64:
427; GFX6:       ; %bb.0:
428; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; GFX6-NEXT:    v_and_b32_e32 v3, 0x80000000, v1
430; GFX6-NEXT:    v_mov_b32_e32 v2, 0
431; GFX6-NEXT:    v_or_b32_e32 v3, 0x43300000, v3
432; GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
433; GFX6-NEXT:    s_mov_b32 s4, -1
434; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
435; GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
436; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
437; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
438; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
439; GFX6-NEXT:    s_setpc_b64 s[30:31]
440;
441; GFX7-LABEL: v_roundeven_f64:
442; GFX7:       ; %bb.0:
443; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
445; GFX7-NEXT:    s_setpc_b64 s[30:31]
446;
447; GFX8-LABEL: v_roundeven_f64:
448; GFX8:       ; %bb.0:
449; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
451; GFX8-NEXT:    s_setpc_b64 s[30:31]
452;
453; GFX9-LABEL: v_roundeven_f64:
454; GFX9:       ; %bb.0:
455; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
457; GFX9-NEXT:    s_setpc_b64 s[30:31]
458  %roundeven = call double @llvm.roundeven.f64(double %x)
459  ret double %roundeven
460}
461
462define double @v_roundeven_f64_fneg(double %x) {
463; GFX6-LABEL: v_roundeven_f64_fneg:
464; GFX6:       ; %bb.0:
465; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
467; GFX6-NEXT:    v_and_b32_e32 v4, 0x80000000, v2
468; GFX6-NEXT:    v_mov_b32_e32 v3, 0
469; GFX6-NEXT:    v_or_b32_e32 v4, 0x43300000, v4
470; GFX6-NEXT:    v_add_f64 v[5:6], -v[0:1], v[3:4]
471; GFX6-NEXT:    v_mov_b32_e32 v1, v0
472; GFX6-NEXT:    s_mov_b32 s4, -1
473; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
474; GFX6-NEXT:    v_add_f64 v[3:4], v[5:6], -v[3:4]
475; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[1:2]|, s[4:5]
476; GFX6-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
477; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
478; GFX6-NEXT:    s_setpc_b64 s[30:31]
479;
480; GFX7-LABEL: v_roundeven_f64_fneg:
481; GFX7:       ; %bb.0:
482; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483; GFX7-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
484; GFX7-NEXT:    s_setpc_b64 s[30:31]
485;
486; GFX8-LABEL: v_roundeven_f64_fneg:
487; GFX8:       ; %bb.0:
488; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489; GFX8-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
490; GFX8-NEXT:    s_setpc_b64 s[30:31]
491;
492; GFX9-LABEL: v_roundeven_f64_fneg:
493; GFX9:       ; %bb.0:
494; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495; GFX9-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
496; GFX9-NEXT:    s_setpc_b64 s[30:31]
497  %neg.x = fneg double %x
498  %roundeven = call double @llvm.roundeven.f64(double %neg.x)
499  ret double %roundeven
500}
501
502define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
503; GFX6-LABEL: v_roundeven_v2f64:
504; GFX6:       ; %bb.0:
505; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506; GFX6-NEXT:    s_brev_b32 s6, 1
507; GFX6-NEXT:    s_mov_b32 s7, 0x43300000
508; GFX6-NEXT:    v_and_b32_e32 v5, s6, v1
509; GFX6-NEXT:    v_mov_b32_e32 v4, 0
510; GFX6-NEXT:    v_or_b32_e32 v5, s7, v5
511; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], v[4:5]
512; GFX6-NEXT:    s_mov_b32 s4, -1
513; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
514; GFX6-NEXT:    v_add_f64 v[5:6], v[6:7], -v[4:5]
515; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
516; GFX6-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
517; GFX6-NEXT:    v_and_b32_e32 v5, s6, v3
518; GFX6-NEXT:    v_or_b32_e32 v5, s7, v5
519; GFX6-NEXT:    v_add_f64 v[7:8], v[2:3], v[4:5]
520; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
521; GFX6-NEXT:    v_add_f64 v[4:5], v[7:8], -v[4:5]
522; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
523; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
524; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
525; GFX6-NEXT:    s_setpc_b64 s[30:31]
526;
527; GFX7-LABEL: v_roundeven_v2f64:
528; GFX7:       ; %bb.0:
529; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
531; GFX7-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
532; GFX7-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX8-LABEL: v_roundeven_v2f64:
535; GFX8:       ; %bb.0:
536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
538; GFX8-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
539; GFX8-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX9-LABEL: v_roundeven_v2f64:
542; GFX9:       ; %bb.0:
543; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
545; GFX9-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
546; GFX9-NEXT:    s_setpc_b64 s[30:31]
547  %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
548  ret <2 x double> %roundeven
549}
550
551declare half @llvm.roundeven.f16(half) #0
552declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
553declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
554
555declare float @llvm.roundeven.f32(float) #0
556declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
557declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
558declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
559
560declare double @llvm.roundeven.f64(double) #0
561declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
562
563declare half @llvm.fabs.f16(half) #0
564declare float @llvm.fabs.f32(float) #0
565
566attributes #0 = { nounwind readnone speculatable willreturn }
567