1;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
4
5define float @v_exp_f32(float %arg0) {
6; SI-LABEL: v_exp_f32:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
10; SI-NEXT:    v_exp_f32_e32 v0, v0
11; SI-NEXT:    s_setpc_b64 s[30:31]
12;
13; VI-LABEL: v_exp_f32:
14; VI:       ; %bb.0:
15; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; VI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
17; VI-NEXT:    v_exp_f32_e32 v0, v0
18; VI-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX9-LABEL: v_exp_f32:
21; GFX9:       ; %bb.0:
22; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX9-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
24; GFX9-NEXT:    v_exp_f32_e32 v0, v0
25; GFX9-NEXT:    s_setpc_b64 s[30:31]
26  %result = call float @llvm.exp.f32(float %arg0)
27  ret float %result
28}
29
30define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
31; GCN-LABEL: v_exp_v2f32:
32; GCN:       ; %bb.0:
33; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GCN-NEXT:    s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
35; GCN-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
36; GCN-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
37; GCN-NEXT:    v_exp_f32_e32 v0, v0
38; GCN-NEXT:    v_exp_f32_e32 v1, v1
39; GCN-NEXT:    s_setpc_b64 s[30:31]
40  %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
41  ret <2 x float> %result
42}
43
44define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
45; GCN-LABEL: v_exp_v3f32:
46; GCN:       ; %bb.0:
47; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-NEXT:    s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
49; GCN-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
50; GCN-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
51; GCN-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
52; GCN-NEXT:    v_exp_f32_e32 v0, v0
53; GCN-NEXT:    v_exp_f32_e32 v1, v1
54; GCN-NEXT:    v_exp_f32_e32 v2, v2
55; GCN-NEXT:    s_setpc_b64 s[30:31]
56;
57  %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
58  ret <3 x float> %result
59}
60
61define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
62; SI-LABEL: v_exp_v4f32:
63; SI:       ; %bb.0:
64; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; SI-NEXT:    s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
66; SI-NEXT:    v_mul_f32_e32 v0, [[SREG]], v0
67; SI-NEXT:    v_mul_f32_e32 v1, [[SREG]], v1
68; SI-NEXT:    v_mul_f32_e32 v2, [[SREG]], v2
69; SI-NEXT:    v_mul_f32_e32 v3, [[SREG]], v3
70; SI-NEXT:    v_exp_f32_e32 v0, v0
71; SI-NEXT:    v_exp_f32_e32 v1, v1
72; SI-NEXT:    v_exp_f32_e32 v2, v2
73; SI-NEXT:    v_exp_f32_e32 v3, v3
74; SI-NEXT:    s_setpc_b64 s[30:31]
75  %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
76  ret <4 x float> %result
77}
78
79define half @v_exp_f16(half %arg0) {
80; SI-LABEL: v_exp_f16:
81; SI:       ; %bb.0:
82; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
84; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
85; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
86; SI-NEXT:    v_exp_f32_e32 v0, v0
87; SI-NEXT:    s_setpc_b64 s[30:31]
88;
89; VI-LABEL: v_exp_f16:
90; VI:       ; %bb.0:
91; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; VI-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
93; VI-NEXT:    v_exp_f16_e32 v0, v0
94; VI-NEXT:    s_setpc_b64 s[30:31]
95;
96; GFX9-LABEL: v_exp_f16:
97; GFX9:       ; %bb.0:
98; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX9-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
100; GFX9-NEXT:    v_exp_f16_e32 v0, v0
101; GFX9-NEXT:    s_setpc_b64 s[30:31]
102  %result = call half @llvm.exp.f16(half %arg0)
103  ret half %result
104}
105
106define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
107; SI-LABEL: v_exp_v2f16:
108; SI:       ; %bb.0:
109; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
111; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
112; SI-NEXT:    s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
113; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
114; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
115; SI-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
116; SI-NEXT:    v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}}
117; SI-NEXT:    v_exp_f32_e32 v0, v0
118; SI-NEXT:    v_exp_f32_e32 v1, v1
119; SI-NEXT:    s_setpc_b64 s[30:31]
120;
121; VI-LABEL: v_exp_v2f16:
122; VI:       ; %bb.0:
123; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; VI-NEXT:    s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
125; VI-NEXT:    v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
126; VI-NEXT:    v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
127; VI-NEXT:    v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}}
128; VI-NEXT:    v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
129; VI-NEXT:    v_exp_f16_e32 [[MUL2]], [[MUL2]]
130; VI-NEXT:    v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]]
131; VI-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX9-LABEL: v_exp_v2f16:
134; GFX9:       ; %bb.0:
135; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX9-NEXT:    s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
137; GFX9-NEXT:    v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0]
138; GFX9-NEXT:    v_exp_f16_e32 v1, v0
139; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
140; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
141; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
142; GFX9-NEXT:    s_setpc_b64 s[30:31]
143  %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
144  ret <2 x half> %result
145}
146
147; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
148;   %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
149;   ret <3 x half> %result
150; }
151
152define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
153; SI-LABEL: v_exp_v4f16:
154; SI:       ; %bb.0:
155; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
157; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
158; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
159; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
160; SI-NEXT:    s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b
161; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
162; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
163; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
164; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
165; SI-NEXT:    v_mul_f32_e32 v0, [[SREG]], v0
166; SI-NEXT:    v_mul_f32_e32 v1, [[SREG]], v1
167; SI-NEXT:    v_mul_f32_e32 v2, [[SREG]], v2
168; SI-NEXT:    v_mul_f32_e32 v3, [[SREG]], v3
169; SI-NEXT:    v_exp_f32_e32 v0, v0
170; SI-NEXT:    v_exp_f32_e32 v1, v1
171; SI-NEXT:    v_exp_f32_e32 v2, v2
172; SI-NEXT:    v_exp_f32_e32 v3, v3
173; SI-NEXT:    s_setpc_b64 s[30:31]
174;
175; VI-LABEL: v_exp_v4f16:
176; VI:       ; %bb.0:
177; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; VI-NEXT:    s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
179; VI-NEXT:    v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
180; VI-NEXT:    v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
181; VI-NEXT:    v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
182; VI-NEXT:    v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
183; VI-NEXT:    v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
184; VI-NEXT:    v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
185; VI-NEXT:    v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
186; VI-NEXT:    v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]]
187; VI-NEXT:    v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
188; VI-NEXT:    v_or_b32_e32 v1, [[EXP1]], [[EXP2]]
189; VI-NEXT:    v_or_b32_e32 v0, [[EXP3]], [[EXP4]]
190; VI-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX9-LABEL: v_exp_v4f16:
193; GFX9:       ; %bb.0:
194; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GFX9-NEXT:    s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5
196; GFX9-NEXT:    v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1
197; GFX9-NEXT:    v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0
198; GFX9-NEXT:    v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
199; GFX9-NEXT:    v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
200; GFX9-NEXT:    v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]]
201; GFX9-NEXT:    v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL2]]
202; GFX9-NEXT:    v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL4]]
203; GFX9-NEXT:    v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL3]]
204; GFX9-NEXT:    v_mov_b32_e32 [[VCONST:v[0-9]+]], 0xffff
205; GFX9-NEXT:    v_and_b32_e32 [[AND1:v[0-9]+]], [[VCONST]], [[EXP2]]
206; GFX9-NEXT:    v_and_b32_e32 [[AND2:v[0-9]+]], [[VCONST]], [[EXP1]]
207; GFX9-NEXT:    v_lshl_or_b32 v0, [[EXP3]], 16, [[AND1]]
208; GFX9-NEXT:    v_lshl_or_b32 v1, [[EXP4]], 16, [[AND2]]
209; GFX9-NEXT:    s_setpc_b64 s[30:31]
210  %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
211  ret <4 x half> %result
212}
213
214declare float @llvm.exp.f32(float)
215declare <2 x float> @llvm.exp.v2f32(<2 x float>)
216declare <3 x float> @llvm.exp.v3f32(<3 x float>)
217declare <4 x float> @llvm.exp.v4f32(<4 x float>)
218
219declare half @llvm.exp.f16(half)
220declare <2 x half> @llvm.exp.v2f16(<2 x half>)
221declare <3 x half> @llvm.exp.v3f16(<3 x half>)
222declare <4 x half> @llvm.exp.v4f16(<4 x half>)
223
224