1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
6; GCN-NOT: v_cmp
7; GCN: v_cmp_ne_u32_e32 vcc,
8; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
9; GCN-NEXT:buffer_store_byte [[RESULT]]
10; GCN-NEXT: s_endpgm
11
12; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
13; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
14define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
15  %icmp0 = icmp eq i32 %a, %b
16  %ext = sext i1 %icmp0 to i32
17  %icmp1 = icmp eq i32 %ext, 0
18  store i1 %icmp1, i1 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
23; GCN-NOT: v_cmp
24; GCN: v_cmp_ne_u32_e32 vcc,
25; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
26; GCN-NEXT: buffer_store_byte [[RESULT]]
27; GCN-NEXT: s_endpgm
28
29; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
30; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
31define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
32  %icmp0 = icmp ne i32 %a, %b
33  %ext = sext i1 %icmp0 to i32
34  %icmp1 = icmp ne i32 %ext, 0
35  store i1 %icmp1, i1 addrspace(1)* %out
36  ret void
37}
38
39; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
40; GCN-NOT: v_cmp
41; GCN: v_cmp_eq_u32_e32 vcc,
42; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
43; GCN-NEXT: buffer_store_byte [[RESULT]]
44; GCN-NEXT: s_endpgm
45define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
46  %icmp0 = icmp eq i32 %a, %b
47  %ext = sext i1 %icmp0 to i32
48  %icmp1 = icmp eq i32 %ext, -1
49  store i1 %icmp1, i1 addrspace(1)* %out
50  ret void
51}
52
53; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
54; GCN-NOT: v_cmp
55; GCN: v_cmp_eq_u32_e32 vcc,
56; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
57; GCN-NEXT: buffer_store_byte [[RESULT]]
58; GCN-NEXT: s_endpgm
59define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
60  %icmp0 = icmp ne i32 %a, %b
61  %ext = sext i1 %icmp0 to i32
62  %icmp1 = icmp ne i32 %ext, -1
63  store i1 %icmp1, i1 addrspace(1)* %out
64  ret void
65}
66
67; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
68; GCN-NOT: v_cmp
69; GCN: v_cmp_ne_u32_e32 vcc,
70; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
71; GCN-NEXT: buffer_store_byte [[RESULT]]
72; GCN-NEXT: s_endpgm
73define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
74  %icmp0 = icmp eq i32 %a, %b
75  %ext = zext i1 %icmp0 to i32
76  %icmp1 = icmp eq i32 %ext, 0
77  store i1 %icmp1, i1 addrspace(1)* %out
78  ret void
79}
80
81; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
82; GCN-NOT: v_cmp
83; GCN: v_cmp_ne_u32_e32 vcc,
84; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
85; GCN-NEXT: buffer_store_byte [[RESULT]]
86; GCN-NEXT: s_endpgm
87define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
88  %icmp0 = icmp ne i32 %a, %b
89  %ext = zext i1 %icmp0 to i32
90  %icmp1 = icmp ne i32 %ext, 0
91  store i1 %icmp1, i1 addrspace(1)* %out
92  ret void
93}
94
95; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
96; GCN-NOT: v_cmp
97; GCN: v_cmp_eq_u32_e32 vcc,
98; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
99; GCN-NEXT: buffer_store_byte [[RESULT]]
100; GCN-NEXT: s_endpgm
101define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
102  %icmp0 = icmp eq i32 %a, %b
103  %ext = zext i1 %icmp0 to i32
104  %icmp1 = icmp eq i32 %ext, 1
105  store i1 %icmp1, i1 addrspace(1)* %out
106  ret void
107}
108
109; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
110; GCN-NOT: v_cmp
111; GCN: v_cmp_eq_u32_e32 vcc,
112; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
113; GCN-NEXT: buffer_store_byte [[RESULT]]
114define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
115  %icmp0 = icmp ne i32 %a, %b
116  %ext = zext i1 %icmp0 to i32
117  %icmp1 = icmp ne i32 %ext, 1
118  store i1 %icmp1, i1 addrspace(1)* %out
119  ret void
120}
121
122; Reduces to false:
123; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
124; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
125; GCN: buffer_store_byte [[TMP]]
126; GCN-NEXT: s_endpgm
127define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
128  %icmp0 = icmp eq i32 %a, %b
129  %ext = zext i1 %icmp0 to i32
130  %icmp1 = icmp eq i32 %ext, -1
131  store i1 %icmp1, i1 addrspace(1)* %out
132  ret void
133}
134
135; Reduces to true:
136; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
137; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
138; GCN: buffer_store_byte [[TMP]]
139; GCN-NEXT: s_endpgm
140define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
141  %icmp0 = icmp ne i32 %a, %b
142  %ext = zext i1 %icmp0 to i32
143  %icmp1 = icmp ne i32 %ext, -1
144  store i1 %icmp1, i1 addrspace(1)* %out
145  ret void
146}
147
148; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
149; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
150; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
151; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
152; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
153; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
154; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]]
155
156; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]]
157; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]]
158
159; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
160; GCN: buffer_store_byte [[RESULT]]
161; GCN: s_endpgm
162define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
163  %b.ext = zext i8 %b to i32
164  %icmp0 = icmp ne i32 %b.ext, 255
165  store i1 %icmp0, i1 addrspace(1)* %out
166  ret void
167}
168
169; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
170; GCN: buffer_load_sbyte [[B:v[0-9]+]]
171; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
172; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
173; GCN: buffer_store_byte [[RESULT]]
174; GCN: s_endpgm
175define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
176  %b = load i8, i8 addrspace(1)* %b.ptr
177  %b.ext = sext i8 %b to i32
178  %icmp0 = icmp ne i32 %b.ext, -1
179  store i1 %icmp0, i1 addrspace(1)* %out
180  ret void
181}
182
183; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg:
184; GCN: v_cmp_ne_u32_e32 vcc, -1, v0
185; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc
186; GCN: buffer_store_byte [[SELECT]]
187define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind {
188  %b.ext = sext i8 %b to i32
189  %icmp0 = icmp ne i32 %b.ext, -1
190  store i1 %icmp0, i1 addrspace(1)* undef
191  ret void
192}
193
194; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
195; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
196; Should do a buffer_load_sbyte and compare with -1
197
198; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
199; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
200; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
201; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
202; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
203; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
204; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}}
205; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
206; GCN: buffer_store_byte [[RESULT]]
207; GCN: s_endpgm
208define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
209  %b.ext = sext i8 %b to i32
210  %icmp0 = icmp ne i32 %b.ext, -1
211  store i1 %icmp0, i1 addrspace(1)* %out
212  ret void
213}
214
215; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
216; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
217; GCN: buffer_store_byte [[RESULT]]
218; GCN: s_endpgm
219define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
220  %b.ext = zext i8 %b to i32
221  %icmp0 = icmp ne i32 %b.ext, -1
222  store i1 %icmp0, i1 addrspace(1)* %out
223  ret void
224}
225
226; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
227; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
228; GCN: buffer_store_byte [[RESULT]]
229; GCN-NEXT: s_endpgm
230define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
231  %icmp0 = icmp ne i32 %a, %b
232  %ext = zext i1 %icmp0 to i32
233  %icmp1 = icmp ne i32 %ext, 2
234  store i1 %icmp1, i1 addrspace(1)* %out
235  ret void
236}
237
238; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
239; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
240; GCN: buffer_store_byte [[RESULT]]
241; GCN-NEXT: s_endpgm
242define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
243  %icmp0 = icmp ne i32 %a, %b
244  %ext = zext i1 %icmp0 to i32
245  %icmp1 = icmp eq i32 %ext, 2
246  store i1 %icmp1, i1 addrspace(1)* %out
247  ret void
248}
249
250; FIXME: These cases should really be able fold to true/false in
251; DAGCombiner
252
253; This really folds away to false
254; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
255; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
256; GCN: buffer_store_byte [[K]]
257define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
258  %icmp0 = icmp eq i32 %a, %b
259  %ext = sext i1 %icmp0 to i32
260  %icmp1 = icmp eq i32 %ext, 1
261  store i1 %icmp1, i1 addrspace(1)* %out
262  ret void
263}
264
265; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
266; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
267; GCN: buffer_store_byte [[K]]
268define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
269  %icmp0 = icmp ne i32 %a, %b
270  %ext = sext i1 %icmp0 to i32
271  %icmp1 = icmp ne i32 %ext, 1
272  store i1 %icmp1, i1 addrspace(1)* %out
273  ret void
274}
275
276; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
277; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
278; GCN: buffer_store_byte [[K]]
279define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
280  %icmp0 = icmp ne i32 %a, %b
281  %ext = sext i1 %icmp0 to i32
282  %icmp1 = icmp ne i32 %ext, 2
283  store i1 %icmp1, i1 addrspace(1)* %out
284  ret void
285}
286