1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5
6define float @v_pow_f32(float %x, float %y) {
7; GFX6-LABEL: v_pow_f32:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    v_log_f32_e32 v0, v0
11; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
12; GFX6-NEXT:    v_exp_f32_e32 v0, v0
13; GFX6-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX8-LABEL: v_pow_f32:
16; GFX8:       ; %bb.0:
17; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX8-NEXT:    v_log_f32_e32 v0, v0
19; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
20; GFX8-NEXT:    v_exp_f32_e32 v0, v0
21; GFX8-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX9-LABEL: v_pow_f32:
24; GFX9:       ; %bb.0:
25; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX9-NEXT:    v_log_f32_e32 v0, v0
27; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
28; GFX9-NEXT:    v_exp_f32_e32 v0, v0
29; GFX9-NEXT:    s_setpc_b64 s[30:31]
30  %pow = call float @llvm.pow.f32(float %x, float %y)
31  ret float %pow
32}
33
34define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
35; GFX6-LABEL: v_pow_v2f32:
36; GFX6:       ; %bb.0:
37; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; GFX6-NEXT:    v_log_f32_e32 v0, v0
39; GFX6-NEXT:    v_log_f32_e32 v1, v1
40; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v2
41; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v3
42; GFX6-NEXT:    v_exp_f32_e32 v0, v0
43; GFX6-NEXT:    v_exp_f32_e32 v1, v1
44; GFX6-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX8-LABEL: v_pow_v2f32:
47; GFX8:       ; %bb.0:
48; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX8-NEXT:    v_log_f32_e32 v0, v0
50; GFX8-NEXT:    v_log_f32_e32 v1, v1
51; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v2
52; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v1, v3
53; GFX8-NEXT:    v_exp_f32_e32 v0, v0
54; GFX8-NEXT:    v_exp_f32_e32 v1, v1
55; GFX8-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX9-LABEL: v_pow_v2f32:
58; GFX9:       ; %bb.0:
59; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    v_log_f32_e32 v0, v0
61; GFX9-NEXT:    v_log_f32_e32 v1, v1
62; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v2
63; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v1, v3
64; GFX9-NEXT:    v_exp_f32_e32 v0, v0
65; GFX9-NEXT:    v_exp_f32_e32 v1, v1
66; GFX9-NEXT:    s_setpc_b64 s[30:31]
67  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
68  ret <2 x float> %pow
69}
70
71define half @v_pow_f16(half %x, half %y) {
72; GFX6-LABEL: v_pow_f16:
73; GFX6:       ; %bb.0:
74; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
76; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
77; GFX6-NEXT:    v_log_f32_e32 v0, v0
78; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
79; GFX6-NEXT:    v_exp_f32_e32 v0, v0
80; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
81; GFX6-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX8-LABEL: v_pow_f16:
84; GFX8:       ; %bb.0:
85; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX8-NEXT:    v_log_f16_e32 v0, v0
87; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
88; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
89; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
90; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
91; GFX8-NEXT:    v_exp_f16_e32 v0, v0
92; GFX8-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX9-LABEL: v_pow_f16:
95; GFX9:       ; %bb.0:
96; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX9-NEXT:    v_log_f16_e32 v0, v0
98; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
99; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
100; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
101; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
102; GFX9-NEXT:    v_exp_f16_e32 v0, v0
103; GFX9-NEXT:    s_setpc_b64 s[30:31]
104  %pow = call half @llvm.pow.f16(half %x, half %y)
105  ret half %pow
106}
107
108define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
109; GFX6-LABEL: v_pow_v2f16:
110; GFX6:       ; %bb.0:
111; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
113; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
114; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
115; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
116; GFX6-NEXT:    v_log_f32_e32 v0, v0
117; GFX6-NEXT:    v_log_f32_e32 v1, v1
118; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v2
119; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v3
120; GFX6-NEXT:    v_exp_f32_e32 v0, v0
121; GFX6-NEXT:    v_exp_f32_e32 v1, v1
122; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
123; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
124; GFX6-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX8-LABEL: v_pow_v2f16:
127; GFX8:       ; %bb.0:
128; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX8-NEXT:    v_log_f16_e32 v2, v0
130; GFX8-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
131; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v1
132; GFX8-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
133; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
134; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
135; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
136; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
137; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
138; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v2
139; GFX8-NEXT:    v_mov_b32_e32 v2, 16
140; GFX8-NEXT:    v_exp_f16_e32 v0, v0
141; GFX8-NEXT:    v_exp_f16_e32 v1, v1
142; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
143; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
144; GFX8-NEXT:    s_setpc_b64 s[30:31]
145;
146; GFX9-LABEL: v_pow_v2f16:
147; GFX9:       ; %bb.0:
148; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX9-NEXT:    v_log_f16_e32 v2, v0
150; GFX9-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
151; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v1
152; GFX9-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
153; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
154; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
155; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
156; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
157; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
158; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
159; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
160; GFX9-NEXT:    v_exp_f16_e32 v1, v2
161; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
162; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
163; GFX9-NEXT:    s_setpc_b64 s[30:31]
164  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
165  ret <2 x half> %pow
166}
167
168define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
169; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
170; GFX6:       ; %bb.0:
171; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
173; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
174; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
175; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
176; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v0
177; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
178; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
179; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
180; GFX6-NEXT:    v_log_f32_e32 v1, v1
181; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
182; GFX6-NEXT:    v_log_f32_e32 v0, v0
183; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v2
184; GFX6-NEXT:    v_exp_f32_e32 v1, v1
185; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v3
186; GFX6-NEXT:    v_exp_f32_e32 v2, v0
187; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v1
188; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v2
189; GFX6-NEXT:    s_setpc_b64 s[30:31]
190;
191; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
192; GFX8:       ; %bb.0:
193; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
195; GFX8-NEXT:    v_log_f16_e32 v2, v0
196; GFX8-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
197; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v1
198; GFX8-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
199; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
200; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
201; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
202; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
203; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
204; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v2
205; GFX8-NEXT:    v_mov_b32_e32 v2, 16
206; GFX8-NEXT:    v_exp_f16_e32 v0, v0
207; GFX8-NEXT:    v_exp_f16_e32 v1, v1
208; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
209; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
210; GFX8-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
213; GFX9:       ; %bb.0:
214; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
216; GFX9-NEXT:    v_log_f16_e32 v2, v0
217; GFX9-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
218; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v1
219; GFX9-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
220; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
221; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
222; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
223; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
224; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
225; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
226; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
227; GFX9-NEXT:    v_exp_f16_e32 v1, v2
228; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
229; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
230; GFX9-NEXT:    s_setpc_b64 s[30:31]
231  %x.fneg = fneg <2 x half> %x
232  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
233  ret <2 x half> %pow
234}
235
236define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
237; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
238; GFX6:       ; %bb.0:
239; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
241; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
242; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
243; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
244; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
245; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
246; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
247; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
248; GFX6-NEXT:    v_log_f32_e32 v0, v0
249; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
250; GFX6-NEXT:    v_log_f32_e32 v1, v1
251; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v2
252; GFX6-NEXT:    v_exp_f32_e32 v0, v0
253; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v3
254; GFX6-NEXT:    v_exp_f32_e32 v1, v1
255; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
256; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
257; GFX6-NEXT:    s_setpc_b64 s[30:31]
258;
259; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
260; GFX8:       ; %bb.0:
261; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX8-NEXT:    v_log_f16_e32 v2, v0
263; GFX8-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
264; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
265; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v1
266; GFX8-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
267; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
268; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
269; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
270; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
271; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
272; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v2
273; GFX8-NEXT:    v_mov_b32_e32 v2, 16
274; GFX8-NEXT:    v_exp_f16_e32 v0, v0
275; GFX8-NEXT:    v_exp_f16_e32 v1, v1
276; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
277; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
278; GFX8-NEXT:    s_setpc_b64 s[30:31]
279;
280; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
281; GFX9:       ; %bb.0:
282; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; GFX9-NEXT:    v_log_f16_e32 v2, v0
284; GFX9-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
285; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
286; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v1
287; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
288; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
289; GFX9-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
290; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
291; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
292; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
293; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
294; GFX9-NEXT:    v_exp_f16_e32 v1, v2
295; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
296; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
297; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
298; GFX9-NEXT:    s_setpc_b64 s[30:31]
299  %y.fneg = fneg <2 x half> %y
300  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
301  ret <2 x half> %pow
302}
303
304define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
305; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffff
309; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
310; GFX6-NEXT:    v_and_b32_e32 v0, v0, v4
311; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
312; GFX6-NEXT:    s_mov_b32 s4, 0x80008000
313; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
314; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
315; GFX6-NEXT:    v_xor_b32_e32 v0, s4, v0
316; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
317; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
318; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
319; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
320; GFX6-NEXT:    v_xor_b32_e32 v1, s4, v1
321; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
322; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
323; GFX6-NEXT:    v_log_f32_e32 v0, v0
324; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
325; GFX6-NEXT:    v_log_f32_e32 v2, v2
326; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
327; GFX6-NEXT:    v_exp_f32_e32 v0, v0
328; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v2, v3
329; GFX6-NEXT:    v_exp_f32_e32 v1, v1
330; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
331; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
332; GFX6-NEXT:    s_setpc_b64 s[30:31]
333;
334; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
335; GFX8:       ; %bb.0:
336; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GFX8-NEXT:    s_mov_b32 s4, 0x80008000
338; GFX8-NEXT:    v_xor_b32_e32 v0, s4, v0
339; GFX8-NEXT:    v_log_f16_e32 v2, v0
340; GFX8-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
341; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v1
342; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v1
343; GFX8-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
344; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
345; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
346; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
347; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
348; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
349; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v2
350; GFX8-NEXT:    v_mov_b32_e32 v2, 16
351; GFX8-NEXT:    v_exp_f16_e32 v0, v0
352; GFX8-NEXT:    v_exp_f16_e32 v1, v1
353; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
354; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
355; GFX8-NEXT:    s_setpc_b64 s[30:31]
356;
357; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
358; GFX9:       ; %bb.0:
359; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360; GFX9-NEXT:    s_mov_b32 s4, 0x80008000
361; GFX9-NEXT:    v_xor_b32_e32 v0, s4, v0
362; GFX9-NEXT:    v_log_f16_e32 v2, v0
363; GFX9-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
364; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
365; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v1
366; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
367; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
368; GFX9-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
369; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v2, v3
370; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
371; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
372; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
373; GFX9-NEXT:    v_exp_f16_e32 v1, v2
374; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
375; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
376; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
377; GFX9-NEXT:    s_setpc_b64 s[30:31]
378  %x.fneg = fneg <2 x half> %x
379  %y.fneg = fneg <2 x half> %y
380  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
381  ret <2 x half> %pow
382}
383
384; FIXME
385; define double @v_pow_f64(double %x, double %y) {
386;   %pow = call double @llvm.pow.f64(double %x, double %y)
387;   ret double %pow
388; }
389
390define float @v_pow_f32_fabs_lhs(float %x, float %y) {
391; GFX6-LABEL: v_pow_f32_fabs_lhs:
392; GFX6:       ; %bb.0:
393; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
395; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
396; GFX6-NEXT:    v_exp_f32_e32 v0, v0
397; GFX6-NEXT:    s_setpc_b64 s[30:31]
398;
399; GFX8-LABEL: v_pow_f32_fabs_lhs:
400; GFX8:       ; %bb.0:
401; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
403; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
404; GFX8-NEXT:    v_exp_f32_e32 v0, v0
405; GFX8-NEXT:    s_setpc_b64 s[30:31]
406;
407; GFX9-LABEL: v_pow_f32_fabs_lhs:
408; GFX9:       ; %bb.0:
409; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
411; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
412; GFX9-NEXT:    v_exp_f32_e32 v0, v0
413; GFX9-NEXT:    s_setpc_b64 s[30:31]
414  %fabs.x = call float @llvm.fabs.f32(float %x)
415  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
416  ret float %pow
417}
418
419define float @v_pow_f32_fabs_rhs(float %x, float %y) {
420; GFX6-LABEL: v_pow_f32_fabs_rhs:
421; GFX6:       ; %bb.0:
422; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423; GFX6-NEXT:    v_log_f32_e32 v0, v0
424; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
425; GFX6-NEXT:    v_exp_f32_e32 v0, v0
426; GFX6-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX8-LABEL: v_pow_f32_fabs_rhs:
429; GFX8:       ; %bb.0:
430; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX8-NEXT:    v_log_f32_e32 v0, v0
432; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
433; GFX8-NEXT:    v_exp_f32_e32 v0, v0
434; GFX8-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX9-LABEL: v_pow_f32_fabs_rhs:
437; GFX9:       ; %bb.0:
438; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX9-NEXT:    v_log_f32_e32 v0, v0
440; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
441; GFX9-NEXT:    v_exp_f32_e32 v0, v0
442; GFX9-NEXT:    s_setpc_b64 s[30:31]
443  %fabs.y = call float @llvm.fabs.f32(float %y)
444  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
445  ret float %pow
446}
447
448define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
449; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
450; GFX6:       ; %bb.0:
451; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
453; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
454; GFX6-NEXT:    v_exp_f32_e32 v0, v0
455; GFX6-NEXT:    s_setpc_b64 s[30:31]
456;
457; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
458; GFX8:       ; %bb.0:
459; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
461; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
462; GFX8-NEXT:    v_exp_f32_e32 v0, v0
463; GFX8-NEXT:    s_setpc_b64 s[30:31]
464;
465; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
466; GFX9:       ; %bb.0:
467; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
469; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, |v1|
470; GFX9-NEXT:    v_exp_f32_e32 v0, v0
471; GFX9-NEXT:    s_setpc_b64 s[30:31]
472  %fabs.x = call float @llvm.fabs.f32(float %x)
473  %fabs.y = call float @llvm.fabs.f32(float %y)
474  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
475  ret float %pow
476}
477
478define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
479; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
480; GFX6:       ; %bb.0:
481; GFX6-NEXT:    v_log_f32_e32 v1, s0
482; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
483; GFX6-NEXT:    v_exp_f32_e32 v0, v0
484; GFX6-NEXT:    ; return to shader part epilog
485;
486; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
487; GFX8:       ; %bb.0:
488; GFX8-NEXT:    v_log_f32_e32 v1, s0
489; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
490; GFX8-NEXT:    v_exp_f32_e32 v0, v0
491; GFX8-NEXT:    ; return to shader part epilog
492;
493; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
494; GFX9:       ; %bb.0:
495; GFX9-NEXT:    v_log_f32_e32 v1, s0
496; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
497; GFX9-NEXT:    v_exp_f32_e32 v0, v0
498; GFX9-NEXT:    ; return to shader part epilog
499  %pow = call float @llvm.pow.f32(float %x, float %y)
500  ret float %pow
501}
502
503define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
504; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
505; GFX6:       ; %bb.0:
506; GFX6-NEXT:    v_log_f32_e32 v0, v0
507; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
508; GFX6-NEXT:    v_exp_f32_e32 v0, v0
509; GFX6-NEXT:    ; return to shader part epilog
510;
511; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
512; GFX8:       ; %bb.0:
513; GFX8-NEXT:    v_log_f32_e32 v0, v0
514; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
515; GFX8-NEXT:    v_exp_f32_e32 v0, v0
516; GFX8-NEXT:    ; return to shader part epilog
517;
518; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
519; GFX9:       ; %bb.0:
520; GFX9-NEXT:    v_log_f32_e32 v0, v0
521; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
522; GFX9-NEXT:    v_exp_f32_e32 v0, v0
523; GFX9-NEXT:    ; return to shader part epilog
524  %pow = call float @llvm.pow.f32(float %x, float %y)
525  ret float %pow
526}
527
528define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
529; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
530; GFX6:       ; %bb.0:
531; GFX6-NEXT:    v_log_f32_e32 v0, s0
532; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
533; GFX6-NEXT:    v_exp_f32_e32 v0, v0
534; GFX6-NEXT:    ; return to shader part epilog
535;
536; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
537; GFX8:       ; %bb.0:
538; GFX8-NEXT:    v_log_f32_e32 v0, s0
539; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
540; GFX8-NEXT:    v_exp_f32_e32 v0, v0
541; GFX8-NEXT:    ; return to shader part epilog
542;
543; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
544; GFX9:       ; %bb.0:
545; GFX9-NEXT:    v_log_f32_e32 v0, s0
546; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
547; GFX9-NEXT:    v_exp_f32_e32 v0, v0
548; GFX9-NEXT:    ; return to shader part epilog
549  %pow = call float @llvm.pow.f32(float %x, float %y)
550  ret float %pow
551}
552
553define float @v_pow_f32_fneg_lhs(float %x, float %y) {
554; GFX6-LABEL: v_pow_f32_fneg_lhs:
555; GFX6:       ; %bb.0:
556; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557; GFX6-NEXT:    v_log_f32_e64 v0, -v0
558; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
559; GFX6-NEXT:    v_exp_f32_e32 v0, v0
560; GFX6-NEXT:    s_setpc_b64 s[30:31]
561;
562; GFX8-LABEL: v_pow_f32_fneg_lhs:
563; GFX8:       ; %bb.0:
564; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX8-NEXT:    v_log_f32_e64 v0, -v0
566; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
567; GFX8-NEXT:    v_exp_f32_e32 v0, v0
568; GFX8-NEXT:    s_setpc_b64 s[30:31]
569;
570; GFX9-LABEL: v_pow_f32_fneg_lhs:
571; GFX9:       ; %bb.0:
572; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573; GFX9-NEXT:    v_log_f32_e64 v0, -v0
574; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
575; GFX9-NEXT:    v_exp_f32_e32 v0, v0
576; GFX9-NEXT:    s_setpc_b64 s[30:31]
577  %neg.x = fneg float %x
578  %pow = call float @llvm.pow.f32(float %neg.x, float %y)
579  ret float %pow
580}
581
582define float @v_pow_f32_fneg_rhs(float %x, float %y) {
583; GFX6-LABEL: v_pow_f32_fneg_rhs:
584; GFX6:       ; %bb.0:
585; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586; GFX6-NEXT:    v_log_f32_e32 v0, v0
587; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
588; GFX6-NEXT:    v_exp_f32_e32 v0, v0
589; GFX6-NEXT:    s_setpc_b64 s[30:31]
590;
591; GFX8-LABEL: v_pow_f32_fneg_rhs:
592; GFX8:       ; %bb.0:
593; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594; GFX8-NEXT:    v_log_f32_e32 v0, v0
595; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
596; GFX8-NEXT:    v_exp_f32_e32 v0, v0
597; GFX8-NEXT:    s_setpc_b64 s[30:31]
598;
599; GFX9-LABEL: v_pow_f32_fneg_rhs:
600; GFX9:       ; %bb.0:
601; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602; GFX9-NEXT:    v_log_f32_e32 v0, v0
603; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
604; GFX9-NEXT:    v_exp_f32_e32 v0, v0
605; GFX9-NEXT:    s_setpc_b64 s[30:31]
606  %neg.y = fneg float %y
607  %pow = call float @llvm.pow.f32(float %x, float %neg.y)
608  ret float %pow
609}
610
611declare half @llvm.pow.f16(half, half)
612declare float @llvm.pow.f32(float, float)
613declare double @llvm.pow.f64(double, double)
614
615declare half @llvm.fabs.f16(half)
616declare float @llvm.fabs.f32(float)
617
618declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
619declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
620