1;RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=FUNC %s
3;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}test_select_v2i32:
6
7; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
8; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
9
10; VI: s_cmp_gt_i32
11; VI: s_cselect_b32
12; VI: s_cmp_gt_i32
13; VI: s_cselect_b32
14
15; SI: v_cmp_gt_i32_e32 vcc
16; SI: v_cndmask_b32_e32
17; SI: v_cmp_gt_i32_e32 vcc
18; SI: v_cndmask_b32_e32
19
20define amdgpu_kernel void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1, <2 x i32> %val) {
21entry:
22  %load0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
23  %load1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
24  %cmp = icmp sgt <2 x i32> %load0, %load1
25  %result = select <2 x i1> %cmp, <2 x i32> %val, <2 x i32> %load0
26  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
27  ret void
28}
29
30; FUNC-LABEL: {{^}}test_select_v2f32:
31
32; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
34
35; SI: v_cmp_neq_f32_e32 vcc
36; SI: v_cndmask_b32_e32
37; SI: v_cmp_neq_f32_e32 vcc
38; SI: v_cndmask_b32_e32
39
40define amdgpu_kernel void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
41entry:
42  %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
43  %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
44  %cmp = fcmp une <2 x float> %0, %1
45  %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
46  store <2 x float> %result, <2 x float> addrspace(1)* %out
47  ret void
48}
49
50;FUNC-LABEL: {{^}}test_select_v4i32:
51
52; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
53; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
54; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
55; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
56
57; VI: s_cselect_b32
58; VI: s_cselect_b32
59; VI: s_cselect_b32
60; VI: s_cselect_b32
61
62; SI: v_cndmask_b32_e32
63; SI: v_cndmask_b32_e32
64; SI: v_cndmask_b32_e32
65; SI: v_cndmask_b32_e32
66
67define amdgpu_kernel void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1, <4 x i32> %val) {
68entry:
69  %load0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
70  %load1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
71  %cmp = icmp sgt <4 x i32> %load0, %load1
72  %result = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %load0
73  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
74  ret void
75}
76
77;FUNC-LABEL: {{^}}test_select_v4f32:
78;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
79;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
80;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
81;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
82
83; SI: v_cndmask_b32_e32
84; SI: v_cndmask_b32_e32
85; SI: v_cndmask_b32_e32
86; SI: v_cndmask_b32_e32
87define amdgpu_kernel void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
88entry:
89  %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
90  %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
91  %cmp = fcmp une <4 x float> %0, %1
92  %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
93  store <4 x float> %result, <4 x float> addrspace(1)* %out
94  ret void
95}
96