1; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4; Test expansion of scalar selects on vectors. 5; Evergreen not enabled since it seems to be having problems with doubles. 6 7 8; FUNC-LABEL: {{^}}select_v4i8: 9; SI: v_cndmask_b32_e32 10; SI: v_cndmask_b32_e32 11; SI: v_cndmask_b32_e32 12; SI: v_cndmask_b32_e32 13define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind { 14 %cmp = icmp eq i8 %c, 0 15 %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b 16 store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}select_v4i16: 21; SI: v_cndmask_b32_e32 22; SI: v_cndmask_b32_e32 23; SI: v_cndmask_b32_e32 24; SI: v_cndmask_b32_e32 25define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind { 26 %cmp = icmp eq i32 %c, 0 27 %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b 28 store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4 29 ret void 30} 31 32; FIXME: Expansion with bitwise operations may be better if doing a 33; vector select with SGPR inputs. 34 35; FUNC-LABEL: {{^}}s_select_v2i32: 36; SI: v_cndmask_b32_e32 37; SI: v_cndmask_b32_e32 38; SI: buffer_store_dwordx2 39define void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind { 40 %cmp = icmp eq i32 %c, 0 41 %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b 42 store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8 43 ret void 44} 45 46; FUNC-LABEL: {{^}}s_select_v4i32: 47; SI: v_cndmask_b32_e32 48; SI: v_cndmask_b32_e32 49; SI: v_cndmask_b32_e32 50; SI: v_cndmask_b32_e32 51; SI: buffer_store_dwordx4 52define void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind { 53 %cmp = icmp eq i32 %c, 0 54 %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b 55 store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16 56 ret void 57} 58 59; FUNC-LABEL: {{^}}v_select_v4i32: 60; SI: buffer_load_dwordx4 61; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} 62; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 63; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 64; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 65; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 66; SI: buffer_store_dwordx4 67define void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 { 68bb: 69 %tmp2 = icmp ult i32 %cond, 32 70 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in 71 %tmp3 = select i1 %tmp2, <4 x i32> %val, <4 x i32> zeroinitializer 72 store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 16 73 ret void 74} 75 76; FUNC-LABEL: {{^}}select_v8i32: 77; SI: v_cndmask_b32_e32 78; SI: v_cndmask_b32_e32 79; SI: v_cndmask_b32_e32 80; SI: v_cndmask_b32_e32 81; SI: v_cndmask_b32_e32 82; SI: v_cndmask_b32_e32 83; SI: v_cndmask_b32_e32 84; SI: v_cndmask_b32_e32 85define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind { 86 %cmp = icmp eq i32 %c, 0 87 %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b 88 store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16 89 ret void 90} 91 92; FUNC-LABEL: {{^}}s_select_v2f32: 93; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} 94; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}} 95 96; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] 97; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] 98; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] 99; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] 100; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} 101 102; SI: v_cndmask_b32_e32 103; SI: v_cndmask_b32_e32 104; SI: buffer_store_dwordx2 105define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind { 106 %cmp = icmp eq i32 %c, 0 107 %select = select i1 %cmp, <2 x float> %a, <2 x float> %b 108 store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16 109 ret void 110} 111 112; FUNC-LABEL: {{^}}s_select_v4f32: 113; SI: s_load_dwordx4 114; SI: s_load_dwordx4 115; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} 116 117; SI: v_cndmask_b32_e32 118; SI: v_cndmask_b32_e32 119; SI: v_cndmask_b32_e32 120; SI: v_cndmask_b32_e32 121 122; SI: buffer_store_dwordx4 123define void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind { 124 %cmp = icmp eq i32 %c, 0 125 %select = select i1 %cmp, <4 x float> %a, <4 x float> %b 126 store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16 127 ret void 128} 129 130; FUNC-LABEL: {{^}}v_select_v4f32: 131; SI: buffer_load_dwordx4 132; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} 133; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 134; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 135; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 136; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 137; SI: buffer_store_dwordx4 138define void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 { 139bb: 140 %tmp2 = icmp ult i32 %cond, 32 141 %val = load <4 x float>, <4 x float> addrspace(1)* %in 142 %tmp3 = select i1 %tmp2, <4 x float> %val, <4 x float> zeroinitializer 143 store <4 x float> %tmp3, <4 x float> addrspace(1)* %out, align 16 144 ret void 145} 146 147; FUNC-LABEL: {{^}}select_v8f32: 148; SI: v_cndmask_b32_e32 149; SI: v_cndmask_b32_e32 150; SI: v_cndmask_b32_e32 151; SI: v_cndmask_b32_e32 152; SI: v_cndmask_b32_e32 153; SI: v_cndmask_b32_e32 154; SI: v_cndmask_b32_e32 155; SI: v_cndmask_b32_e32 156define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind { 157 %cmp = icmp eq i32 %c, 0 158 %select = select i1 %cmp, <8 x float> %a, <8 x float> %b 159 store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16 160 ret void 161} 162 163; FUNC-LABEL: {{^}}select_v2f64: 164; SI: v_cndmask_b32_e32 165; SI: v_cndmask_b32_e32 166; SI: v_cndmask_b32_e32 167; SI: v_cndmask_b32_e32 168define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind { 169 %cmp = icmp eq i32 %c, 0 170 %select = select i1 %cmp, <2 x double> %a, <2 x double> %b 171 store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16 172 ret void 173} 174 175; FUNC-LABEL: {{^}}select_v4f64: 176; SI: v_cndmask_b32_e32 177; SI: v_cndmask_b32_e32 178; SI: v_cndmask_b32_e32 179; SI: v_cndmask_b32_e32 180; SI: v_cndmask_b32_e32 181; SI: v_cndmask_b32_e32 182; SI: v_cndmask_b32_e32 183; SI: v_cndmask_b32_e32 184define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind { 185 %cmp = icmp eq i32 %c, 0 186 %select = select i1 %cmp, <4 x double> %a, <4 x double> %b 187 store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16 188 ret void 189} 190 191; FUNC-LABEL: {{^}}select_v8f64: 192; SI: v_cndmask_b32_e32 193; SI: v_cndmask_b32_e32 194; SI: v_cndmask_b32_e32 195; SI: v_cndmask_b32_e32 196; SI: v_cndmask_b32_e32 197; SI: v_cndmask_b32_e32 198; SI: v_cndmask_b32_e32 199; SI: v_cndmask_b32_e32 200; SI: v_cndmask_b32_e32 201; SI: v_cndmask_b32_e32 202; SI: v_cndmask_b32_e32 203; SI: v_cndmask_b32_e32 204; SI: v_cndmask_b32_e32 205; SI: v_cndmask_b32_e32 206; SI: v_cndmask_b32_e32 207; SI: v_cndmask_b32_e32 208define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind { 209 %cmp = icmp eq i32 %c, 0 210 %select = select i1 %cmp, <8 x double> %a, <8 x double> %b 211 store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16 212 ret void 213} 214 215; Function Attrs: nounwind readnone 216declare i32 @llvm.amdgcn.workitem.id.x() #1 217 218attributes #0 = { nounwind } 219attributes #1 = { nounwind readnone } 220