1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5
6define float @v_pow_f32(float %x, float %y) {
7; GFX6-LABEL: v_pow_f32:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    v_log_f32_e32 v0, v0
11; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
12; GFX6-NEXT:    v_exp_f32_e32 v0, v0
13; GFX6-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX8-LABEL: v_pow_f32:
16; GFX8:       ; %bb.0:
17; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX8-NEXT:    v_log_f32_e32 v0, v0
19; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
20; GFX8-NEXT:    v_exp_f32_e32 v0, v0
21; GFX8-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX9-LABEL: v_pow_f32:
24; GFX9:       ; %bb.0:
25; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX9-NEXT:    v_log_f32_e32 v0, v0
27; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
28; GFX9-NEXT:    v_exp_f32_e32 v0, v0
29; GFX9-NEXT:    s_setpc_b64 s[30:31]
30  %pow = call float @llvm.pow.f32(float %x, float %y)
31  ret float %pow
32}
33
34define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
35; GFX6-LABEL: v_pow_v2f32:
36; GFX6:       ; %bb.0:
37; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; GFX6-NEXT:    v_log_f32_e32 v0, v0
39; GFX6-NEXT:    v_log_f32_e32 v1, v1
40; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
41; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
42; GFX6-NEXT:    v_exp_f32_e32 v0, v0
43; GFX6-NEXT:    v_exp_f32_e32 v1, v1
44; GFX6-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX8-LABEL: v_pow_v2f32:
47; GFX8:       ; %bb.0:
48; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX8-NEXT:    v_log_f32_e32 v0, v0
50; GFX8-NEXT:    v_log_f32_e32 v1, v1
51; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
52; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
53; GFX8-NEXT:    v_exp_f32_e32 v0, v0
54; GFX8-NEXT:    v_exp_f32_e32 v1, v1
55; GFX8-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX9-LABEL: v_pow_v2f32:
58; GFX9:       ; %bb.0:
59; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    v_log_f32_e32 v0, v0
61; GFX9-NEXT:    v_log_f32_e32 v1, v1
62; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
63; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
64; GFX9-NEXT:    v_exp_f32_e32 v0, v0
65; GFX9-NEXT:    v_exp_f32_e32 v1, v1
66; GFX9-NEXT:    s_setpc_b64 s[30:31]
67  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
68  ret <2 x float> %pow
69}
70
71define half @v_pow_f16(half %x, half %y) {
72; GFX6-LABEL: v_pow_f16:
73; GFX6:       ; %bb.0:
74; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
76; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
77; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
78; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
79; GFX6-NEXT:    v_log_f32_e32 v0, v0
80; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
81; GFX6-NEXT:    v_exp_f32_e32 v0, v0
82; GFX6-NEXT:    s_setpc_b64 s[30:31]
83;
84; GFX8-LABEL: v_pow_f16:
85; GFX8:       ; %bb.0:
86; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
88; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
89; GFX8-NEXT:    v_log_f32_e32 v0, v0
90; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
91; GFX8-NEXT:    v_exp_f32_e32 v0, v0
92; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
93; GFX8-NEXT:    s_setpc_b64 s[30:31]
94;
95; GFX9-LABEL: v_pow_f16:
96; GFX9:       ; %bb.0:
97; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
99; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
100; GFX9-NEXT:    v_log_f32_e32 v0, v0
101; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
102; GFX9-NEXT:    v_exp_f32_e32 v0, v0
103; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
104; GFX9-NEXT:    s_setpc_b64 s[30:31]
105  %pow = call half @llvm.pow.f16(half %x, half %y)
106  ret half %pow
107}
108
109define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
110; GFX6-LABEL: v_pow_v2f16:
111; GFX6:       ; %bb.0:
112; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
114; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
115; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
116; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
117; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
118; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
119; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
120; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
121; GFX6-NEXT:    v_log_f32_e32 v0, v0
122; GFX6-NEXT:    v_log_f32_e32 v1, v1
123; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
124; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
125; GFX6-NEXT:    v_exp_f32_e32 v0, v0
126; GFX6-NEXT:    v_exp_f32_e32 v1, v1
127; GFX6-NEXT:    s_setpc_b64 s[30:31]
128;
129; GFX8-LABEL: v_pow_v2f16:
130; GFX8:       ; %bb.0:
131; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
133; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
134; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
135; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
136; GFX8-NEXT:    v_log_f32_e32 v2, v2
137; GFX8-NEXT:    v_log_f32_e32 v0, v0
138; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
139; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
140; GFX8-NEXT:    v_exp_f32_e32 v0, v0
141; GFX8-NEXT:    v_exp_f32_e32 v2, v2
142; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
143; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
144; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
145; GFX8-NEXT:    s_setpc_b64 s[30:31]
146;
147; GFX9-LABEL: v_pow_v2f16:
148; GFX9:       ; %bb.0:
149; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
151; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
152; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
153; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
154; GFX9-NEXT:    v_log_f32_e32 v2, v2
155; GFX9-NEXT:    v_log_f32_e32 v0, v0
156; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
157; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
158; GFX9-NEXT:    v_exp_f32_e32 v0, v0
159; GFX9-NEXT:    v_exp_f32_e32 v1, v2
160; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
161; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
162; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
163; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
164; GFX9-NEXT:    s_setpc_b64 s[30:31]
165  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
166  ret <2 x half> %pow
167}
168
169define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
170; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
171; GFX6:       ; %bb.0:
172; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
174; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
175; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
176; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
177; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
178; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
179; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
180; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
181; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
182; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
183; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
184; GFX6-NEXT:    v_log_f32_e32 v3, v3
185; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
186; GFX6-NEXT:    v_log_f32_e32 v4, v0
187; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
188; GFX6-NEXT:    v_exp_f32_e32 v0, v0
189; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
190; GFX6-NEXT:    v_exp_f32_e32 v1, v1
191; GFX6-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
194; GFX8:       ; %bb.0:
195; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
197; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
198; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
199; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
200; GFX8-NEXT:    v_log_f32_e32 v2, v2
201; GFX8-NEXT:    v_log_f32_e32 v0, v0
202; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
203; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
204; GFX8-NEXT:    v_exp_f32_e32 v0, v0
205; GFX8-NEXT:    v_exp_f32_e32 v2, v2
206; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
207; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
208; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
209; GFX8-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
212; GFX9:       ; %bb.0:
213; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
215; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
216; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
217; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
218; GFX9-NEXT:    v_log_f32_e32 v2, v2
219; GFX9-NEXT:    v_log_f32_e32 v0, v0
220; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
221; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
222; GFX9-NEXT:    v_exp_f32_e32 v0, v0
223; GFX9-NEXT:    v_exp_f32_e32 v1, v2
224; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
225; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
226; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
227; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
228; GFX9-NEXT:    s_setpc_b64 s[30:31]
229  %x.fneg = fneg <2 x half> %x
230  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
231  ret <2 x half> %pow
232}
233
234define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
235; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
236; GFX6:       ; %bb.0:
237; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
239; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
240; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
241; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
242; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
243; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
244; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
245; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
246; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
247; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
248; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
249; GFX6-NEXT:    v_log_f32_e32 v0, v0
250; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
251; GFX6-NEXT:    v_log_f32_e32 v1, v1
252; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
253; GFX6-NEXT:    v_exp_f32_e32 v0, v0
254; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
255; GFX6-NEXT:    v_exp_f32_e32 v1, v1
256; GFX6-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
259; GFX8:       ; %bb.0:
260; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
262; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
263; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
264; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
265; GFX8-NEXT:    v_log_f32_e32 v2, v2
266; GFX8-NEXT:    v_log_f32_e32 v0, v0
267; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
268; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
269; GFX8-NEXT:    v_exp_f32_e32 v0, v0
270; GFX8-NEXT:    v_exp_f32_e32 v2, v2
271; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
272; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
273; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
274; GFX8-NEXT:    s_setpc_b64 s[30:31]
275;
276; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
277; GFX9:       ; %bb.0:
278; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
280; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
281; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
282; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
283; GFX9-NEXT:    v_log_f32_e32 v2, v2
284; GFX9-NEXT:    v_log_f32_e32 v0, v0
285; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
286; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
287; GFX9-NEXT:    v_exp_f32_e32 v0, v0
288; GFX9-NEXT:    v_exp_f32_e32 v1, v2
289; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
290; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
291; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
292; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
293; GFX9-NEXT:    s_setpc_b64 s[30:31]
294  %y.fneg = fneg <2 x half> %y
295  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
296  ret <2 x half> %pow
297}
298
299define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
300; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
301; GFX6:       ; %bb.0:
302; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
304; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
305; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
306; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
307; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
308; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
309; GFX6-NEXT:    s_mov_b32 s4, 0x80008000
310; GFX6-NEXT:    v_xor_b32_e32 v0, s4, v0
311; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
312; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
313; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
314; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
315; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
316; GFX6-NEXT:    v_xor_b32_e32 v2, s4, v2
317; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
318; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
319; GFX6-NEXT:    v_log_f32_e32 v0, v0
320; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
321; GFX6-NEXT:    v_log_f32_e32 v1, v1
322; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
323; GFX6-NEXT:    v_exp_f32_e32 v0, v0
324; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
325; GFX6-NEXT:    v_exp_f32_e32 v1, v1
326; GFX6-NEXT:    s_setpc_b64 s[30:31]
327;
328; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
329; GFX8:       ; %bb.0:
330; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
332; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
333; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
334; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
335; GFX8-NEXT:    v_log_f32_e32 v2, v2
336; GFX8-NEXT:    v_log_f32_e32 v0, v0
337; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
338; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
339; GFX8-NEXT:    v_exp_f32_e32 v0, v0
340; GFX8-NEXT:    v_exp_f32_e32 v2, v2
341; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
342; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
343; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
344; GFX8-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
347; GFX9:       ; %bb.0:
348; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
350; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
351; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
352; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
353; GFX9-NEXT:    v_log_f32_e32 v2, v2
354; GFX9-NEXT:    v_log_f32_e32 v0, v0
355; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
356; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
357; GFX9-NEXT:    v_exp_f32_e32 v0, v0
358; GFX9-NEXT:    v_exp_f32_e32 v1, v2
359; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
360; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
361; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
362; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
363; GFX9-NEXT:    s_setpc_b64 s[30:31]
364  %x.fneg = fneg <2 x half> %x
365  %y.fneg = fneg <2 x half> %y
366  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
367  ret <2 x half> %pow
368}
369
370; FIXME
371; define double @v_pow_f64(double %x, double %y) {
372;   %pow = call double @llvm.pow.f64(double %x, double %y)
373;   ret double %pow
374; }
375
376define float @v_pow_f32_fabs_lhs(float %x, float %y) {
377; GFX6-LABEL: v_pow_f32_fabs_lhs:
378; GFX6:       ; %bb.0:
379; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
381; GFX6-NEXT:    v_log_f32_e32 v0, v0
382; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
383; GFX6-NEXT:    v_exp_f32_e32 v0, v0
384; GFX6-NEXT:    s_setpc_b64 s[30:31]
385;
386; GFX8-LABEL: v_pow_f32_fabs_lhs:
387; GFX8:       ; %bb.0:
388; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX8-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
390; GFX8-NEXT:    v_log_f32_e32 v0, v0
391; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
392; GFX8-NEXT:    v_exp_f32_e32 v0, v0
393; GFX8-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX9-LABEL: v_pow_f32_fabs_lhs:
396; GFX9:       ; %bb.0:
397; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX9-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
399; GFX9-NEXT:    v_log_f32_e32 v0, v0
400; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
401; GFX9-NEXT:    v_exp_f32_e32 v0, v0
402; GFX9-NEXT:    s_setpc_b64 s[30:31]
403  %fabs.x = call float @llvm.fabs.f32(float %x)
404  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
405  ret float %pow
406}
407
408define float @v_pow_f32_fabs_rhs(float %x, float %y) {
409; GFX6-LABEL: v_pow_f32_fabs_rhs:
410; GFX6:       ; %bb.0:
411; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX6-NEXT:    v_log_f32_e32 v0, v0
413; GFX6-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
414; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
415; GFX6-NEXT:    v_exp_f32_e32 v0, v0
416; GFX6-NEXT:    s_setpc_b64 s[30:31]
417;
418; GFX8-LABEL: v_pow_f32_fabs_rhs:
419; GFX8:       ; %bb.0:
420; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421; GFX8-NEXT:    v_log_f32_e32 v0, v0
422; GFX8-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
423; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
424; GFX8-NEXT:    v_exp_f32_e32 v0, v0
425; GFX8-NEXT:    s_setpc_b64 s[30:31]
426;
427; GFX9-LABEL: v_pow_f32_fabs_rhs:
428; GFX9:       ; %bb.0:
429; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430; GFX9-NEXT:    v_log_f32_e32 v0, v0
431; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
432; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
433; GFX9-NEXT:    v_exp_f32_e32 v0, v0
434; GFX9-NEXT:    s_setpc_b64 s[30:31]
435  %fabs.y = call float @llvm.fabs.f32(float %y)
436  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
437  ret float %pow
438}
439
440define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
441; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
442; GFX6:       ; %bb.0:
443; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444; GFX6-NEXT:    s_brev_b32 s4, -2
445; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
446; GFX6-NEXT:    v_log_f32_e32 v0, v0
447; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
448; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
449; GFX6-NEXT:    v_exp_f32_e32 v0, v0
450; GFX6-NEXT:    s_setpc_b64 s[30:31]
451;
452; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
453; GFX8:       ; %bb.0:
454; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455; GFX8-NEXT:    s_brev_b32 s4, -2
456; GFX8-NEXT:    v_and_b32_e32 v0, s4, v0
457; GFX8-NEXT:    v_log_f32_e32 v0, v0
458; GFX8-NEXT:    v_and_b32_e32 v1, s4, v1
459; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
460; GFX8-NEXT:    v_exp_f32_e32 v0, v0
461; GFX8-NEXT:    s_setpc_b64 s[30:31]
462;
463; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
464; GFX9:       ; %bb.0:
465; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX9-NEXT:    s_brev_b32 s4, -2
467; GFX9-NEXT:    v_and_b32_e32 v0, s4, v0
468; GFX9-NEXT:    v_log_f32_e32 v0, v0
469; GFX9-NEXT:    v_and_b32_e32 v1, s4, v1
470; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
471; GFX9-NEXT:    v_exp_f32_e32 v0, v0
472; GFX9-NEXT:    s_setpc_b64 s[30:31]
473  %fabs.x = call float @llvm.fabs.f32(float %x)
474  %fabs.y = call float @llvm.fabs.f32(float %y)
475  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
476  ret float %pow
477}
478
479define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
480; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
481; GFX6:       ; %bb.0:
482; GFX6-NEXT:    v_log_f32_e32 v1, s0
483; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
484; GFX6-NEXT:    v_exp_f32_e32 v0, v0
485; GFX6-NEXT:    ; return to shader part epilog
486;
487; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
488; GFX8:       ; %bb.0:
489; GFX8-NEXT:    v_log_f32_e32 v1, s0
490; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
491; GFX8-NEXT:    v_exp_f32_e32 v0, v0
492; GFX8-NEXT:    ; return to shader part epilog
493;
494; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
495; GFX9:       ; %bb.0:
496; GFX9-NEXT:    v_log_f32_e32 v1, s0
497; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
498; GFX9-NEXT:    v_exp_f32_e32 v0, v0
499; GFX9-NEXT:    ; return to shader part epilog
500  %pow = call float @llvm.pow.f32(float %x, float %y)
501  ret float %pow
502}
503
504define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
505; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
506; GFX6:       ; %bb.0:
507; GFX6-NEXT:    v_log_f32_e32 v0, v0
508; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
509; GFX6-NEXT:    v_exp_f32_e32 v0, v0
510; GFX6-NEXT:    ; return to shader part epilog
511;
512; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
513; GFX8:       ; %bb.0:
514; GFX8-NEXT:    v_log_f32_e32 v0, v0
515; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
516; GFX8-NEXT:    v_exp_f32_e32 v0, v0
517; GFX8-NEXT:    ; return to shader part epilog
518;
519; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
520; GFX9:       ; %bb.0:
521; GFX9-NEXT:    v_log_f32_e32 v0, v0
522; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
523; GFX9-NEXT:    v_exp_f32_e32 v0, v0
524; GFX9-NEXT:    ; return to shader part epilog
525  %pow = call float @llvm.pow.f32(float %x, float %y)
526  ret float %pow
527}
528
529define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
530; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
531; GFX6:       ; %bb.0:
532; GFX6-NEXT:    v_log_f32_e32 v0, s0
533; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
534; GFX6-NEXT:    v_exp_f32_e32 v0, v0
535; GFX6-NEXT:    ; return to shader part epilog
536;
537; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
538; GFX8:       ; %bb.0:
539; GFX8-NEXT:    v_log_f32_e32 v0, s0
540; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
541; GFX8-NEXT:    v_exp_f32_e32 v0, v0
542; GFX8-NEXT:    ; return to shader part epilog
543;
544; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
545; GFX9:       ; %bb.0:
546; GFX9-NEXT:    v_log_f32_e32 v0, s0
547; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
548; GFX9-NEXT:    v_exp_f32_e32 v0, v0
549; GFX9-NEXT:    ; return to shader part epilog
550  %pow = call float @llvm.pow.f32(float %x, float %y)
551  ret float %pow
552}
553
554declare half @llvm.pow.f16(half, half)
555declare float @llvm.pow.f32(float, float)
556declare double @llvm.pow.f64(double, double)
557
558declare half @llvm.fabs.f16(half)
559declare float @llvm.fabs.f32(float)
560
561declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
562declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
563