1; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4; Test expansion of scalar selects on vectors.
5; Evergreen not enabled since it seems to be having problems with doubles.
6
7
8; FUNC-LABEL: {{^}}select_v4i8:
9; SI: v_cndmask_b32_e32
10; SI: v_cndmask_b32_e32
11; SI: v_cndmask_b32_e32
12; SI: v_cndmask_b32_e32
13define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
14  %cmp = icmp eq i8 %c, 0
15  %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
16  store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
17  ret void
18}
19
20; FUNC-LABEL: {{^}}select_v4i16:
21; SI: v_cndmask_b32_e32
22; SI: v_cndmask_b32_e32
23; SI: v_cndmask_b32_e32
24; SI: v_cndmask_b32_e32
25define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
26  %cmp = icmp eq i32 %c, 0
27  %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
28  store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4
29  ret void
30}
31
32; FIXME: Expansion with bitwise operations may be better if doing a
33; vector select with SGPR inputs.
34
35; FUNC-LABEL: {{^}}s_select_v2i32:
36; SI: v_cndmask_b32_e32
37; SI: v_cndmask_b32_e32
38; SI: buffer_store_dwordx2
39define void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
40  %cmp = icmp eq i32 %c, 0
41  %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
42  store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8
43  ret void
44}
45
46; FUNC-LABEL: {{^}}s_select_v4i32:
47; SI: v_cndmask_b32_e32
48; SI: v_cndmask_b32_e32
49; SI: v_cndmask_b32_e32
50; SI: v_cndmask_b32_e32
51; SI: buffer_store_dwordx4
52define void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
53  %cmp = icmp eq i32 %c, 0
54  %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
55  store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16
56  ret void
57}
58
59; FUNC-LABEL: {{^}}v_select_v4i32:
60; SI: buffer_load_dwordx4
61; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
62; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
63; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
64; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
65; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
66; SI: buffer_store_dwordx4
67define void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 {
68bb:
69  %tmp2 = icmp ult i32 %cond, 32
70  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in
71  %tmp3 = select i1 %tmp2, <4 x i32> %val, <4 x i32> zeroinitializer
72  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 16
73  ret void
74}
75
76; FUNC-LABEL: {{^}}select_v8i32:
77; SI: v_cndmask_b32_e32
78; SI: v_cndmask_b32_e32
79; SI: v_cndmask_b32_e32
80; SI: v_cndmask_b32_e32
81; SI: v_cndmask_b32_e32
82; SI: v_cndmask_b32_e32
83; SI: v_cndmask_b32_e32
84; SI: v_cndmask_b32_e32
85define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
86  %cmp = icmp eq i32 %c, 0
87  %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
88  store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16
89  ret void
90}
91
92; FUNC-LABEL: {{^}}s_select_v2f32:
93; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
94; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
95
96; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
97; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
98; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
99; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
100; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
101
102; SI: v_cndmask_b32_e32
103; SI: v_cndmask_b32_e32
104; SI: buffer_store_dwordx2
105define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
106  %cmp = icmp eq i32 %c, 0
107  %select = select i1 %cmp, <2 x float> %a, <2 x float> %b
108  store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16
109  ret void
110}
111
112; FUNC-LABEL: {{^}}s_select_v4f32:
113; SI: s_load_dwordx4
114; SI: s_load_dwordx4
115; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
116
117; SI: v_cndmask_b32_e32
118; SI: v_cndmask_b32_e32
119; SI: v_cndmask_b32_e32
120; SI: v_cndmask_b32_e32
121
122; SI: buffer_store_dwordx4
123define void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
124  %cmp = icmp eq i32 %c, 0
125  %select = select i1 %cmp, <4 x float> %a, <4 x float> %b
126  store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16
127  ret void
128}
129
130; FUNC-LABEL: {{^}}v_select_v4f32:
131; SI: buffer_load_dwordx4
132; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
133; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
134; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
135; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
136; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
137; SI: buffer_store_dwordx4
138define void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 {
139bb:
140  %tmp2 = icmp ult i32 %cond, 32
141  %val = load <4 x float>, <4 x float> addrspace(1)* %in
142  %tmp3 = select i1 %tmp2, <4 x float> %val, <4 x float> zeroinitializer
143  store <4 x float> %tmp3, <4 x float> addrspace(1)* %out, align 16
144  ret void
145}
146
147; FUNC-LABEL: {{^}}select_v8f32:
148; SI: v_cndmask_b32_e32
149; SI: v_cndmask_b32_e32
150; SI: v_cndmask_b32_e32
151; SI: v_cndmask_b32_e32
152; SI: v_cndmask_b32_e32
153; SI: v_cndmask_b32_e32
154; SI: v_cndmask_b32_e32
155; SI: v_cndmask_b32_e32
156define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
157  %cmp = icmp eq i32 %c, 0
158  %select = select i1 %cmp, <8 x float> %a, <8 x float> %b
159  store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16
160  ret void
161}
162
163; FUNC-LABEL: {{^}}select_v2f64:
164; SI: v_cndmask_b32_e32
165; SI: v_cndmask_b32_e32
166; SI: v_cndmask_b32_e32
167; SI: v_cndmask_b32_e32
168define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
169  %cmp = icmp eq i32 %c, 0
170  %select = select i1 %cmp, <2 x double> %a, <2 x double> %b
171  store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16
172  ret void
173}
174
175; FUNC-LABEL: {{^}}select_v4f64:
176; SI: v_cndmask_b32_e32
177; SI: v_cndmask_b32_e32
178; SI: v_cndmask_b32_e32
179; SI: v_cndmask_b32_e32
180; SI: v_cndmask_b32_e32
181; SI: v_cndmask_b32_e32
182; SI: v_cndmask_b32_e32
183; SI: v_cndmask_b32_e32
184define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
185  %cmp = icmp eq i32 %c, 0
186  %select = select i1 %cmp, <4 x double> %a, <4 x double> %b
187  store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16
188  ret void
189}
190
191; FUNC-LABEL: {{^}}select_v8f64:
192; SI: v_cndmask_b32_e32
193; SI: v_cndmask_b32_e32
194; SI: v_cndmask_b32_e32
195; SI: v_cndmask_b32_e32
196; SI: v_cndmask_b32_e32
197; SI: v_cndmask_b32_e32
198; SI: v_cndmask_b32_e32
199; SI: v_cndmask_b32_e32
200; SI: v_cndmask_b32_e32
201; SI: v_cndmask_b32_e32
202; SI: v_cndmask_b32_e32
203; SI: v_cndmask_b32_e32
204; SI: v_cndmask_b32_e32
205; SI: v_cndmask_b32_e32
206; SI: v_cndmask_b32_e32
207; SI: v_cndmask_b32_e32
208define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
209  %cmp = icmp eq i32 %c, 0
210  %select = select i1 %cmp, <8 x double> %a, <8 x double> %b
211  store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16
212  ret void
213}
214
215; Function Attrs: nounwind readnone
216declare i32 @llvm.amdgcn.workitem.id.x() #1
217
218attributes #0 = { nounwind }
219attributes #1 = { nounwind readnone }
220