1; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3declare float @llvm.fabs.f32(float) #0
4declare float @llvm.canonicalize.f32(float) #0
5declare double @llvm.fabs.f64(double) #0
6declare double @llvm.canonicalize.f64(double) #0
7declare half @llvm.canonicalize.f16(half) #0
8declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
9declare i32 @llvm.amdgcn.workitem.id.x() #0
10declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
11
12; GCN-LABEL: {{^}}v_test_canonicalize_var_f32:
13; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
14; GCN: buffer_store_dword [[REG]]
15define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
16  %val = load float, float addrspace(1)* %out
17  %canonicalized = call float @llvm.canonicalize.f32(float %val)
18  store float %canonicalized, float addrspace(1)* %out
19  ret void
20}
21
22; GCN-LABEL: {{^}}s_test_canonicalize_var_f32:
23; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
24; GCN: buffer_store_dword [[REG]]
25define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
26  %canonicalized = call float @llvm.canonicalize.f32(float %val)
27  store float %canonicalized, float addrspace(1)* %out
28  ret void
29}
30
31; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32:
32; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
33; GCN: buffer_store_dword [[REG]]
34define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
35  %val = load float, float addrspace(1)* %out
36  %val.fabs = call float @llvm.fabs.f32(float %val)
37  %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
38  store float %canonicalized, float addrspace(1)* %out
39  ret void
40}
41
42; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
43; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
44; GCN: buffer_store_dword [[REG]]
45define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
46  %val = load float, float addrspace(1)* %out
47  %val.fabs = call float @llvm.fabs.f32(float %val)
48  %val.fabs.fneg = fsub float -0.0, %val.fabs
49  %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
50  store float %canonicalized, float addrspace(1)* %out
51  ret void
52}
53
54; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
55; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
56; GCN: buffer_store_dword [[REG]]
57define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
58  %val = load float, float addrspace(1)* %out
59  %val.fneg = fsub float -0.0, %val
60  %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
61  store float %canonicalized, float addrspace(1)* %out
62  ret void
63}
64
65; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
66; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
67; GCN: buffer_store_dword [[REG]]
68define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
69  %canonicalized = call float @llvm.canonicalize.f32(float undef)
70  store float %canonicalized, float addrspace(1)* %out
71  ret void
72}
73
74; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
75; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
76; GCN: buffer_store_dword [[REG]]
77define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
78  %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
79  store float %canonicalized, float addrspace(1)* %out
80  ret void
81}
82
83; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
84; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
85; GCN: buffer_store_dword [[REG]]
86define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
87  %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
88  store float %canonicalized, float addrspace(1)* %out
89  ret void
90}
91
92; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
94; GCN: buffer_store_dword [[REG]]
95define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
96  %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
97  store float %canonicalized, float addrspace(1)* %out
98  ret void
99}
100
101; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
103; GCN: buffer_store_dword [[REG]]
104define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
105  %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
106  store float %canonicalized, float addrspace(1)* %out
107  ret void
108}
109
110; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
112; GCN: buffer_store_dword [[REG]]
113define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
114  %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
115  store float %canonicalized, float addrspace(1)* %out
116  ret void
117}
118
119; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
120; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
121; GCN: buffer_store_dword [[REG]]
122define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
123  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
124  store float %canonicalized, float addrspace(1)* %out
125  ret void
126}
127
128; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
129; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
130; GCN: buffer_store_dword [[REG]]
131define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
132  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
133  store float %canonicalized, float addrspace(1)* %out
134  ret void
135}
136
137; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
138; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
139; GCN: buffer_store_dword [[REG]]
140define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
141  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
142  store float %canonicalized, float addrspace(1)* %out
143  ret void
144}
145
146; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
147; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
148; GCN: buffer_store_dword [[REG]]
149define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
150  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
151  store float %canonicalized, float addrspace(1)* %out
152  ret void
153}
154
155; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
156; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
157; GCN: buffer_store_dword [[REG]]
158define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
159  %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
160  store float %canonicalized, float addrspace(1)* %out
161  ret void
162}
163
164; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
165; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
166; GCN: buffer_store_dword [[REG]]
167define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
168  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
169  store float %canonicalized, float addrspace(1)* %out
170  ret void
171}
172
173; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
174; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
175; GCN: buffer_store_dword [[REG]]
176define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
177  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
178  store float %canonicalized, float addrspace(1)* %out
179  ret void
180}
181
182; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
183; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
184; GCN: buffer_store_dword [[REG]]
185define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
186  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
187  store float %canonicalized, float addrspace(1)* %out
188  ret void
189}
190
191; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
192; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
193; GCN: buffer_store_dword [[REG]]
194define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
195  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
196  store float %canonicalized, float addrspace(1)* %out
197  ret void
198}
199
200; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
201; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
202; GCN: buffer_store_dword [[REG]]
203define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
204  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
205  store float %canonicalized, float addrspace(1)* %out
206  ret void
207}
208
209; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
210; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
211; GCN: buffer_store_dword [[REG]]
212define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
213  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
214  store float %canonicalized, float addrspace(1)* %out
215  ret void
216}
217
218; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
219; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
220; GCN: buffer_store_dwordx2 [[REG]]
221define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
222  %val = load double, double addrspace(1)* %out
223  %canonicalized = call double @llvm.canonicalize.f64(double %val)
224  store double %canonicalized, double addrspace(1)* %out
225  ret void
226}
227
228; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
229; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
230; GCN: buffer_store_dwordx2 [[REG]]
231define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
232  %canonicalized = call double @llvm.canonicalize.f64(double %val)
233  store double %canonicalized, double addrspace(1)* %out
234  ret void
235}
236
237; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
238; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
239; GCN: buffer_store_dwordx2 [[REG]]
240define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
241  %val = load double, double addrspace(1)* %out
242  %val.fabs = call double @llvm.fabs.f64(double %val)
243  %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
244  store double %canonicalized, double addrspace(1)* %out
245  ret void
246}
247
248; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
249; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
250; GCN: buffer_store_dwordx2 [[REG]]
251define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
252  %val = load double, double addrspace(1)* %out
253  %val.fabs = call double @llvm.fabs.f64(double %val)
254  %val.fabs.fneg = fsub double -0.0, %val.fabs
255  %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
256  store double %canonicalized, double addrspace(1)* %out
257  ret void
258}
259
260; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
261; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
262; GCN: buffer_store_dwordx2 [[REG]]
263define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
264  %val = load double, double addrspace(1)* %out
265  %val.fneg = fsub double -0.0, %val
266  %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
267  store double %canonicalized, double addrspace(1)* %out
268  ret void
269}
270
271; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
272; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
273; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
274; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
275define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
276  %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
277  store double %canonicalized, double addrspace(1)* %out
278  ret void
279}
280
281; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
282; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
283; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
284; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
285define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
286  %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
287  store double %canonicalized, double addrspace(1)* %out
288  ret void
289}
290
291; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
292; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
293; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
294; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
295define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
296  %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
297  store double %canonicalized, double addrspace(1)* %out
298  ret void
299}
300
301; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
302; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
303; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
304; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
305define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
306  %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
307  store double %canonicalized, double addrspace(1)* %out
308  ret void
309}
310
311; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
312; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
313; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
314; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
315define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
316  %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
317  store double %canonicalized, double addrspace(1)* %out
318  ret void
319}
320
321; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
322; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
323; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
324; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
325define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
326  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
327  store double %canonicalized, double addrspace(1)* %out
328  ret void
329}
330
331; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
332; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
333; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
334; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
335define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
336  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
337  store double %canonicalized, double addrspace(1)* %out
338  ret void
339}
340
341; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
342; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
343; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
344; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
345define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
346  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
347  store double %canonicalized, double addrspace(1)* %out
348  ret void
349}
350
351; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
352; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
353; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
354; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
355define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
356  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
357  store double %canonicalized, double addrspace(1)* %out
358  ret void
359}
360
361; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
362; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
363; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
364; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
365define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
366  %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
367  store double %canonicalized, double addrspace(1)* %out
368  ret void
369}
370
371; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
372; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
373; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
374; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
375define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
376  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
377  store double %canonicalized, double addrspace(1)* %out
378  ret void
379}
380
381; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
382; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
383; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
384; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
385define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
386  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
387  store double %canonicalized, double addrspace(1)* %out
388  ret void
389}
390
391; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
392; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
393; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
394; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
395define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
396  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
397  store double %canonicalized, double addrspace(1)* %out
398  ret void
399}
400
401; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
402; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
403; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
404; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
405define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
406  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
407  store double %canonicalized, double addrspace(1)* %out
408  ret void
409}
410
411; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
412; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
413; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
414; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
415define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
416  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
417  store double %canonicalized, double addrspace(1)* %out
418  ret void
419}
420
421; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
422; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
423; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
424; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
425define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
426  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
427  store double %canonicalized, double addrspace(1)* %out
428  ret void
429}
430
431; GCN-LABEL:  {{^}}test_canonicalize_value_f64_flush:
432; GCN: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
433define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
434  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
435  %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
436  %v = load double, double addrspace(1)* %gep, align 8
437  %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
438  %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
439  store double %canonicalized, double addrspace(1)* %gep2, align 8
440  ret void
441}
442
443; GCN-LABEL:  {{^}}test_canonicalize_value_f32_flush:
444; GCN: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
445define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
446  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
447  %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
448  %v = load float, float addrspace(1)* %gep, align 4
449  %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
450  %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
451  store float %canonicalized, float addrspace(1)* %gep2, align 4
452  ret void
453}
454
455; GCN-LABEL:  {{^}}test_canonicalize_value_f16_flush:
456; GCN: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
457define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
458  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
459  %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
460  %v = load half, half addrspace(1)* %gep, align 2
461  %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
462  %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
463  store half %canonicalized, half addrspace(1)* %gep2, align 2
464  ret void
465}
466
467; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_flush_gfx8:
468; GCN:     v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
469; GCN-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
470; GCN-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
471define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
472  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
473  %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
474  %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
475  %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
476  %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
477  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
478  ret void
479}
480
481; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_flush_gfx9:
482; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
483define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 {
484  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
485  %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
486  %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
487  %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
488  %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
489  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
490  ret void
491}
492
493; GCN-LABEL:  {{^}}test_canonicalize_value_f64_denorm:
494; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
495define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #5 {
496  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
497  %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
498  %v = load double, double addrspace(1)* %gep, align 8
499  %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
500  %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
501  store double %canonicalized, double addrspace(1)* %gep2, align 8
502  ret void
503}
504
505; GCN-LABEL:  {{^}}test_canonicalize_value_f32_denorm:
506; GCN: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
507define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #5 {
508  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
509  %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
510  %v = load float, float addrspace(1)* %gep, align 4
511  %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
512  %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
513  store float %canonicalized, float addrspace(1)* %gep2, align 4
514  ret void
515}
516
517; GCN-LABEL:  {{^}}test_canonicalize_value_f16_denorm:
518; GCN: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
519define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #5 {
520  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
521  %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
522  %v = load half, half addrspace(1)* %gep, align 2
523  %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
524  %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
525  store half %canonicalized, half addrspace(1)* %gep2, align 2
526  ret void
527}
528
529; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_denorm:
530; GCN: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
531define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #5 {
532  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
533  %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
534  %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
535  %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
536  %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
537  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
538  ret void
539}
540
541; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f64:
542; GCN: v_max_f64
543; GCN: v_max_f64
544define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(1)* %out) #1 {
545  %tid = call i32 @llvm.amdgcn.workitem.id.x()
546  %gep = getelementptr <2 x double>, <2 x double> addrspace(1)* %out, i32 %tid
547  %val = load <2 x double>, <2 x double> addrspace(1)* %gep
548  %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
549  store <2 x double> %canonicalized, <2 x double> addrspace(1)* %out
550  ret void
551}
552
553attributes #0 = { nounwind readnone }
554attributes #1 = { nounwind }
555attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
556attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
557attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
558attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
559attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }
560