1; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
2
3; EG-LABEL: {{^}}test_fmax_f32:
4; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
5; EG: MAX_DX10 {{.*}}[[OUT]]
6define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
7  %val = call float @llvm.maxnum.f32(float %a, float %b)
8  store float %val, float addrspace(1)* %out, align 4
9  ret void
10}
11
12; EG-LABEL: {{^}}test_fmax_v2f32:
13; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
14; EG: MAX_DX10 {{.*}}[[OUT]]
15; EG: MAX_DX10 {{.*}}[[OUT]]
16define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
17  %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
18  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
19  ret void
20}
21
22; EG-LABEL: {{^}}test_fmax_v4f32:
23; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
24; EG: MAX_DX10 {{.*}}[[OUT]]
25; EG: MAX_DX10 {{.*}}[[OUT]]
26; EG: MAX_DX10 {{.*}}[[OUT]]
27; EG: MAX_DX10 {{.*}}[[OUT]]
28define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
29  %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
30  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
31  ret void
32}
33
34; EG-LABEL: {{^}}test_fmax_v8f32:
35; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
36; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
37; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
38; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
39; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
40; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
41; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
42; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
43; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
44; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
45define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
46  %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
47  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
48  ret void
49}
50
51; EG-LABEL: {{^}}test_fmax_v16f32:
52; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
53; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
54; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
55; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
56; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
57; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
58; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
59; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
60; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
61; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
62; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
63; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
64; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
65; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
66; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
67; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
68; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
69; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
70; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
71; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
72define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
73  %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
74  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
75  ret void
76}
77
78; EG-LABEL: {{^}}constant_fold_fmax_f32:
79; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
80; EG-NOT: MAX_DX10
81; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
82define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
83  %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
84  store float %val, float addrspace(1)* %out, align 4
85  ret void
86}
87
88; EG-LABEL: {{^}}constant_fold_fmax_f32_nan_nan:
89; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
90; EG-NOT: MAX_DX10
91; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
92; EG: 2143289344(nan)
93define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
94  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
95  store float %val, float addrspace(1)* %out, align 4
96  ret void
97}
98
99; EG-LABEL: {{^}}constant_fold_fmax_f32_val_nan:
100; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
101; EG-NOT: MAX_DX10
102; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
103define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
104  %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
105  store float %val, float addrspace(1)* %out, align 4
106  ret void
107}
108
109; EG-LABEL: {{^}}constant_fold_fmax_f32_nan_val:
110; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
111; EG-NOT: MAX_DX10
112; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
113define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
114  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
115  store float %val, float addrspace(1)* %out, align 4
116  ret void
117}
118
119; EG-LABEL: {{^}}constant_fold_fmax_f32_p0_p0:
120; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
121; EG-NOT: MAX_DX10
122; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
123define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
124  %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
125  store float %val, float addrspace(1)* %out, align 4
126  ret void
127}
128
129; EG-LABEL: {{^}}constant_fold_fmax_f32_p0_n0:
130; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
131; EG-NOT: MAX_DX10
132; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
133define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
134  %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
135  store float %val, float addrspace(1)* %out, align 4
136  ret void
137}
138
139; EG-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
140; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
141; EG-NOT: MAX_DX10
142; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
143define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
144  %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
145  store float %val, float addrspace(1)* %out, align 4
146  ret void
147}
148
149; EG-LABEL: {{^}}constant_fold_fmax_f32_n0_n0:
150; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
151; EG-NOT: MAX_DX10
152; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
153define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
154  %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
155  store float %val, float addrspace(1)* %out, align 4
156  ret void
157}
158
159; EG-LABEL: {{^}}fmax_var_immediate_f32:
160; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
161; EG: MAX_DX10 * [[OUT]]
162define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
163  %val = call float @llvm.maxnum.f32(float %a, float 2.0)
164  store float %val, float addrspace(1)* %out, align 4
165  ret void
166}
167
168; EG-LABEL: {{^}}fmax_immediate_var_f32:
169; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
170; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
171define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
172  %val = call float @llvm.maxnum.f32(float 2.0, float %a)
173  store float %val, float addrspace(1)* %out, align 4
174  ret void
175}
176
177; EG-LABEL: {{^}}fmax_var_literal_f32:
178; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
179; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
180define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
181  %val = call float @llvm.maxnum.f32(float %a, float 99.0)
182  store float %val, float addrspace(1)* %out, align 4
183  ret void
184}
185
186; EG-LABEL: {{^}}fmax_literal_var_f32:
187; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
188; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
189define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
190  %val = call float @llvm.maxnum.f32(float 99.0, float %a)
191  store float %val, float addrspace(1)* %out, align 4
192  ret void
193}
194
195declare float @llvm.maxnum.f32(float, float) #1
196declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
197declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
198declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
199declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
200declare double @llvm.maxnum.f64(double, double)
201
202attributes #0 = { nounwind }
203attributes #1 = { nounwind readnone }
204