1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math  -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FIXME: Should replace unsafe-fp-math with no signed zeros.
6
7declare i32 @llvm.r600.read.tidig.x() #1
8
9; FUNC-LABEL: @test_fmin_legacy_f32
10; EG: MIN *
11; SI-SAFE: v_min_legacy_f32_e32
12; SI-NONAN: v_min_f32_e32
13define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
14   %r0 = extractelement <4 x float> %reg0, i32 0
15   %r1 = extractelement <4 x float> %reg0, i32 1
16   %r2 = fcmp uge float %r0, %r1
17   %r3 = select i1 %r2, float %r1, float %r0
18   %vec = insertelement <4 x float> undef, float %r3, i32 0
19   store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16
20   ret void
21}
22
23; FUNC-LABEL: @test_fmin_legacy_ule_f32
24; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
25; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
26; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
27; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
28define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
29  %tid = call i32 @llvm.r600.read.tidig.x() #1
30  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
31  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
32
33  %a = load float, float addrspace(1)* %gep.0, align 4
34  %b = load float, float addrspace(1)* %gep.1, align 4
35
36  %cmp = fcmp ule float %a, %b
37  %val = select i1 %cmp, float %a, float %b
38  store float %val, float addrspace(1)* %out, align 4
39  ret void
40}
41
42; FUNC-LABEL: @test_fmin_legacy_ole_f32
43; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
44; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
45; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
46; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
47define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
48  %tid = call i32 @llvm.r600.read.tidig.x() #1
49  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
50  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
51
52  %a = load float, float addrspace(1)* %gep.0, align 4
53  %b = load float, float addrspace(1)* %gep.1, align 4
54
55  %cmp = fcmp ole float %a, %b
56  %val = select i1 %cmp, float %a, float %b
57  store float %val, float addrspace(1)* %out, align 4
58  ret void
59}
60
61; FUNC-LABEL: @test_fmin_legacy_olt_f32
62; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
63; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
64; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
65; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
66define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
67  %tid = call i32 @llvm.r600.read.tidig.x() #1
68  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
69  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
70
71  %a = load float, float addrspace(1)* %gep.0, align 4
72  %b = load float, float addrspace(1)* %gep.1, align 4
73
74  %cmp = fcmp olt float %a, %b
75  %val = select i1 %cmp, float %a, float %b
76  store float %val, float addrspace(1)* %out, align 4
77  ret void
78}
79
80; FUNC-LABEL: @test_fmin_legacy_ult_f32
81; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
82; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
83; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
84; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
85define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
86  %tid = call i32 @llvm.r600.read.tidig.x() #1
87  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
88  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
89
90  %a = load float, float addrspace(1)* %gep.0, align 4
91  %b = load float, float addrspace(1)* %gep.1, align 4
92
93  %cmp = fcmp ult float %a, %b
94  %val = select i1 %cmp, float %a, float %b
95  store float %val, float addrspace(1)* %out, align 4
96  ret void
97}
98
99; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
100; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
101; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
102; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
103; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
104define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
105  %tid = call i32 @llvm.r600.read.tidig.x() #1
106  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
107  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
108
109  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
110  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
111
112  %cmp = fcmp ult <1 x float> %a, %b
113  %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
114  store <1 x float> %val, <1 x float> addrspace(1)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
119; SI: buffer_load_dwordx2
120; SI: buffer_load_dwordx2
121; SI-SAFE: v_min_legacy_f32_e32
122; SI-SAFE: v_min_legacy_f32_e32
123
124; SI-NONAN: v_min_f32_e32
125; SI-NONAN: v_min_f32_e32
126define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
127  %tid = call i32 @llvm.r600.read.tidig.x() #1
128  %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
129  %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
130
131  %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
132  %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
133
134  %cmp = fcmp ult <2 x float> %a, %b
135  %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
136  store <2 x float> %val, <2 x float> addrspace(1)* %out
137  ret void
138}
139
140; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
141; SI-SAFE: v_min_legacy_f32_e32
142; SI-SAFE: v_min_legacy_f32_e32
143; SI-SAFE: v_min_legacy_f32_e32
144
145; SI-NONAN: v_min_f32_e32
146; SI-NONAN: v_min_f32_e32
147; SI-NONAN: v_min_f32_e32
148define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
149  %tid = call i32 @llvm.r600.read.tidig.x() #1
150  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
151  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
152
153  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
154  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
155
156  %cmp = fcmp ult <3 x float> %a, %b
157  %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
158  store <3 x float> %val, <3 x float> addrspace(1)* %out
159  ret void
160}
161
162; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
163; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
164; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
165; SI-NOT: v_min
166; SI: v_cmp_le_f32
167; SI-NEXT: v_cndmask_b32
168; SI-NOT: v_min
169; SI: s_endpgm
170define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
171  %tid = call i32 @llvm.r600.read.tidig.x() #1
172  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
173  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
174
175  %a = load float, float addrspace(1)* %gep.0, align 4
176  %b = load float, float addrspace(1)* %gep.1, align 4
177
178  %cmp = fcmp ole float %a, %b
179  %val0 = select i1 %cmp, float %a, float %b
180  store float %val0, float addrspace(1)* %out0, align 4
181  store i1 %cmp, i1 addrspace(1)* %out1
182  ret void
183}
184
185attributes #0 = { nounwind }
186attributes #1 = { nounwind readnone }
187