1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
5define i16 @v_powi_f16(i16 %l, i32 %r) {
6; GCN-LABEL: v_powi_f16:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
10; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
11; GCN-NEXT:    v_log_f32_e32 v0, v0
12; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
13; GCN-NEXT:    v_exp_f32_e32 v0, v0
14; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
15; GCN-NEXT:    s_setpc_b64 s[30:31]
16  %l.cast = bitcast i16 %l to half
17  %res = call half @llvm.powi.f16(half %l.cast, i32 %r)
18  %res.cast = bitcast half %res to i16
19  ret i16 %res.cast
20}
21
22define float @v_powi_f32(float %l, i32 %r) {
23; GCN-LABEL: v_powi_f32:
24; GCN:       ; %bb.0:
25; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GCN-NEXT:    v_log_f32_e32 v0, v0
27; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
28; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
29; GCN-NEXT:    v_exp_f32_e32 v0, v0
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31  %res = call float @llvm.powi.f32(float %l, i32 %r)
32  ret float %res
33}
34
35define float @v_powi_0_f32(float %l) {
36; GCN-LABEL: v_powi_0_f32:
37; GCN:       ; %bb.0:
38; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
40; GCN-NEXT:    s_setpc_b64 s[30:31]
41  %res = call float @llvm.powi.f32(float %l, i32 0)
42  ret float %res
43}
44
45define float @v_powi_1_f32(float %l) {
46; GCN-LABEL: v_powi_1_f32:
47; GCN:       ; %bb.0:
48; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50  %res = call float @llvm.powi.f32(float %l, i32 1)
51  ret float %res
52}
53
54define float @v_powi_neg1_f32(float %l) {
55; GFX7-LABEL: v_powi_neg1_f32:
56; GFX7:       ; %bb.0:
57; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
59; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
60; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
61; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
62; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
63; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
64; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
65; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
66; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
67; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
68; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
69; GFX7-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX8-LABEL: v_powi_neg1_f32:
72; GFX8:       ; %bb.0:
73; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
75; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
76; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
77; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
78; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
79; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
80; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
81; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
82; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
83; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
84; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
85; GFX8-NEXT:    s_setpc_b64 s[30:31]
86  %res = call float @llvm.powi.f32(float %l, i32 -1)
87  ret float %res
88}
89
90define float @v_powi_2_f32(float %l) {
91; GCN-LABEL: v_powi_2_f32:
92; GCN:       ; %bb.0:
93; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
95; GCN-NEXT:    s_setpc_b64 s[30:31]
96  %res = call float @llvm.powi.f32(float %l, i32 2)
97  ret float %res
98}
99
100define float @v_powi_neg2_f32(float %l) {
101; GFX7-LABEL: v_powi_neg2_f32:
102; GFX7:       ; %bb.0:
103; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
105; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
106; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
107; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
108; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
109; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
110; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
111; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
112; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
113; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
114; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
115; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
116; GFX7-NEXT:    s_setpc_b64 s[30:31]
117;
118; GFX8-LABEL: v_powi_neg2_f32:
119; GFX8:       ; %bb.0:
120; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
122; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
123; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
124; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
125; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
126; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
127; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
128; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
129; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
130; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
131; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
132; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
133; GFX8-NEXT:    s_setpc_b64 s[30:31]
134  %res = call float @llvm.powi.f32(float %l, i32 -2)
135  ret float %res
136}
137
138define float @v_powi_4_f32(float %l) {
139; GCN-LABEL: v_powi_4_f32:
140; GCN:       ; %bb.0:
141; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
143; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
144; GCN-NEXT:    s_setpc_b64 s[30:31]
145  %res = call float @llvm.powi.f32(float %l, i32 4)
146  ret float %res
147}
148
149define float @v_powi_8_f32(float %l) {
150; GCN-LABEL: v_powi_8_f32:
151; GCN:       ; %bb.0:
152; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
154; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
155; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
156; GCN-NEXT:    s_setpc_b64 s[30:31]
157  %res = call float @llvm.powi.f32(float %l, i32 8)
158  ret float %res
159}
160
161define float @v_powi_16_f32(float %l) {
162; GCN-LABEL: v_powi_16_f32:
163; GCN:       ; %bb.0:
164; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
166; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
167; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
168; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
169; GCN-NEXT:    s_setpc_b64 s[30:31]
170  %res = call float @llvm.powi.f32(float %l, i32 16)
171  ret float %res
172}
173
174define float @v_powi_128_f32(float %l) {
175; GCN-LABEL: v_powi_128_f32:
176; GCN:       ; %bb.0:
177; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
179; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
180; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
181; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
182; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
183; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
184; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
185; GCN-NEXT:    s_setpc_b64 s[30:31]
186  %res = call float @llvm.powi.f32(float %l, i32 128)
187  ret float %res
188}
189
190define float @v_powi_neg128_f32(float %l) {
191; GFX7-LABEL: v_powi_neg128_f32:
192; GFX7:       ; %bb.0:
193; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
195; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
196; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
197; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
198; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
199; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
200; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
201; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
202; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
203; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
204; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
205; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
206; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
207; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
208; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
209; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
210; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
211; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
212; GFX7-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX8-LABEL: v_powi_neg128_f32:
215; GFX8:       ; %bb.0:
216; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
218; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
219; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
220; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
221; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
222; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
223; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
224; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
225; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
226; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
227; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
228; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
229; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
230; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
231; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
232; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
233; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
234; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
235; GFX8-NEXT:    s_setpc_b64 s[30:31]
236  %res = call float @llvm.powi.f32(float %l, i32 -128)
237  ret float %res
238}
239
240; FIXME: f64 broken
241; define double @v_powi_f64(double %l, i32 %r) {
242;   %res = call double @llvm.powi.f64(double %l, i32 %r)
243;   ret double %res
244; }
245
246declare half @llvm.powi.f16(half, i32) #0
247declare float @llvm.powi.f32(float, i32) #0
248declare double @llvm.powi.f64(double, i32) #0
249
250attributes #0 = { nounwind readnone speculatable willreturn }
251