1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
5; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s
6; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
7
8; These tests check that fdiv is expanded correctly and also test that the
9; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
10; instruction groups.
11
12; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
13
14; FUNC-LABEL: {{^}}fdiv_f32:
15; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
16; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
17
18; UNSAFE-FP: v_rcp_f32
19; UNSAFE-FP: v_mul_f32_e32
20
21; SI-DAG: v_rcp_f32
22; SI-DAG: v_mul_f32
23
24; I754-DAG: v_div_scale_f32
25; I754-DAG: v_rcp_f32
26; I754-DAG: v_fma_f32
27; I754-DAG: v_mul_f32
28; I754-DAG: v_fma_f32
29; I754-DAG: v_div_fixup_f32
30define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
31entry:
32  %0 = fdiv float %a, %b
33  store float %0, float addrspace(1)* %out
34  ret void
35}
36
37; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
38; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
39; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
40
41; UNSAFE-FP: v_rcp_f32
42; UNSAFE-FP: v_mul_f32_e32
43
44; SI-DAG: v_rcp_f32
45; SI-DAG: v_mul_f32
46define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) {
47entry:
48  %0 = fdiv fast float %a, %b
49  store float %0, float addrspace(1)* %out
50  ret void
51}
52
53; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
54; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
55; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
56
57; UNSAFE-FP: v_rcp_f32
58; UNSAFE-FP: v_mul_f32_e32
59
60; SI-DAG: v_rcp_f32
61; SI-DAG: v_mul_f32
62define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) {
63entry:
64  %0 = fdiv arcp float %a, %b
65  store float %0, float addrspace(1)* %out
66  ret void
67}
68
69; FUNC-LABEL: {{^}}fdiv_v2f32:
70; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
71; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
72; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
73; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
74
75; UNSAFE-FP: v_rcp_f32
76; UNSAFE-FP: v_rcp_f32
77; UNSAFE-FP: v_mul_f32_e32
78; UNSAFE-FP: v_mul_f32_e32
79
80; SI-DAG: v_rcp_f32
81; SI-DAG: v_mul_f32
82; SI-DAG: v_rcp_f32
83; SI-DAG: v_mul_f32
84
85; I754: v_div_scale_f32
86; I754: v_div_scale_f32
87; I754: v_div_scale_f32
88; I754: v_div_scale_f32
89; I754: v_div_fixup_f32
90; I754: v_div_fixup_f32
91define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
92entry:
93  %0 = fdiv <2 x float> %a, %b
94  store <2 x float> %0, <2 x float> addrspace(1)* %out
95  ret void
96}
97
98; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
99; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
100; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
101; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
102; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
103
104; UNSAFE-FP: v_rcp_f32
105; UNSAFE-FP: v_rcp_f32
106; UNSAFE-FP: v_mul_f32_e32
107; UNSAFE-FP: v_mul_f32_e32
108
109; SI-DAG: v_rcp_f32
110; SI-DAG: v_mul_f32
111; SI-DAG: v_rcp_f32
112; SI-DAG: v_mul_f32
113define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
114entry:
115  %0 = fdiv fast <2 x float> %a, %b
116  store <2 x float> %0, <2 x float> addrspace(1)* %out
117  ret void
118}
119
120; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
121; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
122; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
123; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
124; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
125
126; UNSAFE-FP: v_rcp_f32
127; UNSAFE-FP: v_rcp_f32
128; UNSAFE-FP: v_mul_f32_e32
129; UNSAFE-FP: v_mul_f32_e32
130
131; SI-DAG: v_rcp_f32
132; SI-DAG: v_mul_f32
133; SI-DAG: v_rcp_f32
134; SI-DAG: v_mul_f32
135define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
136entry:
137  %0 = fdiv arcp <2 x float> %a, %b
138  store <2 x float> %0, <2 x float> addrspace(1)* %out
139  ret void
140}
141
142; FUNC-LABEL: {{^}}fdiv_v4f32:
143; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
144; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
145; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
146; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
147; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
148; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
149; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
150; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
151
152; UNSAFE-FP: v_rcp_f32_e32
153; UNSAFE-FP: v_rcp_f32_e32
154; UNSAFE-FP: v_rcp_f32_e32
155; UNSAFE-FP: v_rcp_f32_e32
156; UNSAFE-FP: v_mul_f32_e32
157; UNSAFE-FP: v_mul_f32_e32
158; UNSAFE-FP: v_mul_f32_e32
159; UNSAFE-FP: v_mul_f32_e32
160
161; SI-DAG: v_rcp_f32
162; SI-DAG: v_mul_f32
163; SI-DAG: v_rcp_f32
164; SI-DAG: v_mul_f32
165; SI-DAG: v_rcp_f32
166; SI-DAG: v_mul_f32
167; SI-DAG: v_rcp_f32
168; SI-DAG: v_mul_f32
169
170; I754: v_div_scale_f32
171; I754: v_div_scale_f32
172; I754: v_div_scale_f32
173; I754: v_div_scale_f32
174; I754: v_div_scale_f32
175; I754: v_div_scale_f32
176; I754: v_div_scale_f32
177; I754: v_div_scale_f32
178; I754: v_div_fixup_f32
179; I754: v_div_fixup_f32
180; I754: v_div_fixup_f32
181; I754: v_div_fixup_f32
182define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
183  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
184  %a = load <4 x float>, <4 x float> addrspace(1) * %in
185  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
186  %result = fdiv <4 x float> %a, %b
187  store <4 x float> %result, <4 x float> addrspace(1)* %out
188  ret void
189}
190
191; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
192; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
193; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
194; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
195; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
196; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
197; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
198; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
199; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
200
201; UNSAFE-FP: v_rcp_f32_e32
202; UNSAFE-FP: v_rcp_f32_e32
203; UNSAFE-FP: v_rcp_f32_e32
204; UNSAFE-FP: v_rcp_f32_e32
205; UNSAFE-FP: v_mul_f32_e32
206; UNSAFE-FP: v_mul_f32_e32
207; UNSAFE-FP: v_mul_f32_e32
208; UNSAFE-FP: v_mul_f32_e32
209
210; SI-DAG: v_rcp_f32
211; SI-DAG: v_mul_f32
212; SI-DAG: v_rcp_f32
213; SI-DAG: v_mul_f32
214; SI-DAG: v_rcp_f32
215; SI-DAG: v_mul_f32
216; SI-DAG: v_rcp_f32
217; SI-DAG: v_mul_f32
218define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
219  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
220  %a = load <4 x float>, <4 x float> addrspace(1) * %in
221  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
222  %result = fdiv fast <4 x float> %a, %b
223  store <4 x float> %result, <4 x float> addrspace(1)* %out
224  ret void
225}
226
227; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
228; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
229; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
230; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
231; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
232; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
233; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
234; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
235; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
236
237; UNSAFE-FP: v_rcp_f32_e32
238; UNSAFE-FP: v_rcp_f32_e32
239; UNSAFE-FP: v_rcp_f32_e32
240; UNSAFE-FP: v_rcp_f32_e32
241; UNSAFE-FP: v_mul_f32_e32
242; UNSAFE-FP: v_mul_f32_e32
243; UNSAFE-FP: v_mul_f32_e32
244; UNSAFE-FP: v_mul_f32_e32
245
246; SI-DAG: v_rcp_f32
247; SI-DAG: v_mul_f32
248; SI-DAG: v_rcp_f32
249; SI-DAG: v_mul_f32
250; SI-DAG: v_rcp_f32
251; SI-DAG: v_mul_f32
252; SI-DAG: v_rcp_f32
253; SI-DAG: v_mul_f32
254define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
255  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
256  %a = load <4 x float>, <4 x float> addrspace(1) * %in
257  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
258  %result = fdiv arcp <4 x float> %a, %b
259  store <4 x float> %result, <4 x float> addrspace(1)* %out
260  ret void
261}
262