1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; Make sure to test with f32 and i32 compares. If we have to use float
4; compares, we always have multiple condition registers. If we can do
5; scalar compares, we don't want to use multiple condition registers.
6
7; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32:
8; GCN-DAG: v_cmp_ne_u32_e32 vcc,
9; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
10; GCN: s_and_b64 vcc, vcc, [[CMP1]]
11; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
12; GCN-NOT: [[RESULT]]
13; GCN: buffer_store_dword [[RESULT]]
14define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
15  %icmp0 = icmp ne i32 %a, %b
16  %icmp1 = icmp ne i32 %a, %c
17  %and = and i1 %icmp0, %icmp1
18  %select = select i1 %and, i32 %x, i32 %y
19  store i32 %select, i32 addrspace(1)* %out
20  ret void
21}
22
23; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32:
24; GCN-DAG: v_cmp_lg_f32_e32 vcc
25; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
26; GCN: s_and_b64 vcc, vcc, [[CMP1]]
27; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
28; GCN-NOT: [[RESULT]]
29; GCN: buffer_store_dword [[RESULT]]
30define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
31  %fcmp0 = fcmp one float %a, %b
32  %fcmp1 = fcmp one float %a, %c
33  %and = and i1 %fcmp0, %fcmp1
34  %select = select i1 %and, i32 %x, i32 %y
35  store i32 %select, i32 addrspace(1)* %out
36  ret void
37}
38
39; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32:
40; GCN-DAG: v_cmp_ne_u32_e32 vcc,
41; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
42; GCN: s_and_b64 vcc, vcc, [[CMP1]]
43; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
44; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
45; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
46define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
47  %icmp0 = icmp ne i32 %a, %b
48  %icmp1 = icmp ne i32 %a, %c
49  %and = and i1 %icmp0, %icmp1
50  %select = select i1 %and, i64 %x, i64 %y
51  store i64 %select, i64 addrspace(1)* %out
52  ret void
53}
54
55; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32:
56; GCN-DAG: v_cmp_lg_f32_e32 vcc,
57; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
58; GCN: s_and_b64 vcc, vcc, [[CMP1]]
59; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
60; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
61; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
62define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
63  %fcmp0 = fcmp one float %a, %b
64  %fcmp1 = fcmp one float %a, %c
65  %and = and i1 %fcmp0, %fcmp1
66  %select = select i1 %and, i64 %x, i64 %y
67  store i64 %select, i64 addrspace(1)* %out
68  ret void
69}
70
71; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32:
72; GCN-DAG: v_cmp_ne_u32_e32 vcc,
73; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
74; GCN: s_or_b64 vcc, vcc, [[CMP1]]
75; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
76; GCN-NOT: [[RESULT]]
77; GCN: buffer_store_dword [[RESULT]]
78; GCN: s_endpgm
79define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
80  %icmp0 = icmp ne i32 %a, %b
81  %icmp1 = icmp ne i32 %a, %c
82  %or = or i1 %icmp0, %icmp1
83  %select = select i1 %or, i32 %x, i32 %y
84  store i32 %select, i32 addrspace(1)* %out
85  ret void
86}
87
88; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32:
89; GCN-DAG: v_cmp_lg_f32_e32 vcc
90; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
91; GCN: s_or_b64 vcc, vcc, [[CMP1]]
92; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
93; GCN-NOT: [[RESULT]]
94; GCN: buffer_store_dword [[RESULT]]
95define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
96  %fcmp0 = fcmp one float %a, %b
97  %fcmp1 = fcmp one float %a, %c
98  %or = or i1 %fcmp0, %fcmp1
99  %select = select i1 %or, i32 %x, i32 %y
100  store i32 %select, i32 addrspace(1)* %out
101  ret void
102}
103
104; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32:
105; GCN-DAG: v_cmp_ne_u32_e32 vcc,
106; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
107; GCN: s_or_b64 vcc, vcc, [[CMP1]]
108; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
109; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
110; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
111define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
112  %icmp0 = icmp ne i32 %a, %b
113  %icmp1 = icmp ne i32 %a, %c
114  %or = or i1 %icmp0, %icmp1
115  %select = select i1 %or, i64 %x, i64 %y
116  store i64 %select, i64 addrspace(1)* %out
117  ret void
118}
119
120; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32:
121; GCN-DAG: v_cmp_lg_f32_e32 vcc,
122; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
123; GCN: s_or_b64 vcc, vcc, [[CMP1]]
124; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
125; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
126; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
127define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
128  %fcmp0 = fcmp one float %a, %b
129  %fcmp1 = fcmp one float %a, %c
130  %or = or i1 %fcmp0, %fcmp1
131  %select = select i1 %or, i64 %x, i64 %y
132  store i64 %select, i64 addrspace(1)* %out
133  ret void
134}
135
136; GCN-LABEL: {{^}}regression:
137; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
138; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
139; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
140
141define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
142entry:
143  %cmp0 = fcmp oeq float %c0, 1.0
144  br i1 %cmp0, label %if0, label %endif
145
146if0:
147  %cmp1 = fcmp oeq float %c1, 0.0
148  br i1 %cmp1, label %if1, label %endif
149
150if1:
151  %cmp2 = xor i1 %cmp1, true
152  br label %endif
153
154endif:
155  %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ]
156  %tmp2 = select i1 %tmp0, float 4.0, float 0.0
157  store float %tmp2, float addrspace(1)* %out
158  ret void
159}
160
161attributes #0 = { nounwind }
162