1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
3
4declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
5
6; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
7; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
8; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
9; SI: v_cmp_eq_u32_e32 vcc, 0, [[TMP]]{{$}}
10; SI: v_cndmask_b32_e64
11; SI: buffer_store_byte
12define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
13  %load = load i1, i1 addrspace(1)* %in
14  %ext = sext i1 %load to i32
15  %cmp = icmp eq i32 %ext, 0
16  store i1 %cmp, i1 addrspace(1)* %out
17  ret void
18}
19
20; FIXME: The negate should be inverting the compare.
21; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
22; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
23; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
24; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
25; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
26; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
27; SI: buffer_store_byte [[RESULT]]
28define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
29  %load = load i1, i1 addrspace(1)* %in
30  %ext = zext i1 %load to i32
31  %cmp = icmp eq i32 %ext, 0
32  store i1 %cmp, i1 addrspace(1)* %out
33  ret void
34}
35
36; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
37; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
38; SI: buffer_store_byte [[RESULT]]
39define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
40  %load = load i1, i1 addrspace(1)* %in
41  %ext = sext i1 %load to i32
42  %cmp = icmp eq i32 %ext, 1
43  store i1 %cmp, i1 addrspace(1)* %out
44  ret void
45}
46
47; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
48; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
49; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
50; SI: buffer_store_byte [[RESULT]]
51define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
52  %load = load i1, i1 addrspace(1)* %in
53  %ext = zext i1 %load to i32
54  %cmp = icmp eq i32 %ext, 1
55  store i1 %cmp, i1 addrspace(1)* %out
56  ret void
57}
58
59; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
60; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
61; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
62; SI: buffer_store_byte [[RESULT]]
63define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
64  %load = load i1, i1 addrspace(1)* %in
65  %ext = sext i1 %load to i32
66  %cmp = icmp eq i32 %ext, -1
67  store i1 %cmp, i1 addrspace(1)* %out
68  ret void
69}
70
71; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
72; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
73; SI: buffer_store_byte [[RESULT]]
74define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
75  %load = load i1, i1 addrspace(1)* %in
76  %ext = zext i1 %load to i32
77  %cmp = icmp eq i32 %ext, -1
78  store i1 %cmp, i1 addrspace(1)* %out
79  ret void
80}
81
82
83; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
84; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
85; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
86; SI: buffer_store_byte [[RESULT]]
87define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
88  %load = load i1, i1 addrspace(1)* %in
89  %ext = sext i1 %load to i32
90  %cmp = icmp ne i32 %ext, 0
91  store i1 %cmp, i1 addrspace(1)* %out
92  ret void
93}
94
95; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
96; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
97; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
98; SI: buffer_store_byte [[RESULT]]
99define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
100  %load = load i1, i1 addrspace(1)* %in
101  %ext = zext i1 %load to i32
102  %cmp = icmp ne i32 %ext, 0
103  store i1 %cmp, i1 addrspace(1)* %out
104  ret void
105}
106
107; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
108; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
109; SI: buffer_store_byte [[RESULT]]
110define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
111  %load = load i1, i1 addrspace(1)* %in
112  %ext = sext i1 %load to i32
113  %cmp = icmp ne i32 %ext, 1
114  store i1 %cmp, i1 addrspace(1)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
119; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
120; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
121; SI: v_cmp_eq_u32_e32 vcc, 1, [[TMP]]{{$}}
122; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
123; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
124; SI: buffer_store_byte [[RESULT]]
125define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
126  %load = load i1, i1 addrspace(1)* %in
127  %ext = zext i1 %load to i32
128  %cmp = icmp ne i32 %ext, 1
129  store i1 %cmp, i1 addrspace(1)* %out
130  ret void
131}
132
133; FIXME: This should be one compare.
134; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
135; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
136; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
137; XSI: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
138; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
139; XSI-NEXT: buffer_store_byte [[RESULT]]
140define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
141  %load = load i1, i1 addrspace(1)* %in
142  %ext = sext i1 %load to i32
143  %cmp = icmp ne i32 %ext, -1
144  store i1 %cmp, i1 addrspace(1)* %out
145  ret void
146}
147
148; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
149; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
150; SI: buffer_store_byte [[RESULT]]
151define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
152  %load = load i1, i1 addrspace(1)* %in
153  %ext = zext i1 %load to i32
154  %cmp = icmp ne i32 %ext, -1
155  store i1 %cmp, i1 addrspace(1)* %out
156  ret void
157}
158
159; FIXME: Need to handle non-uniform case for function below (load without gep).
160; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
161; SI: {{buffer|flat}}_load_sbyte [[LOAD:v[0-9]+]]
162; SI: v_cmp_ne_u32_e32 vcc, -1, [[LOAD]]{{$}}
163; SI-NEXT: v_cndmask_b32_e64
164; SI: {{buffer|flat}}_store_byte
165define amdgpu_kernel void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
166  %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
167  %in.ptr = getelementptr i8, i8 addrspace(1)* %in, i32 %tid.x
168  %load = load i8, i8 addrspace(1)* %in.ptr
169  %masked = and i8 %load, 255
170  %ext = sext i8 %masked to i32
171  %cmp = icmp ne i32 %ext, -1
172  store i1 %cmp, i1 addrspace(1)* %out
173  ret void
174}
175