1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}select_and1: 4; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 5; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 6; GCN-NOT: v_and_b32 7; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 8define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) { 9 %c = icmp slt i32 %x, 11 10 %s = select i1 %c, i32 0, i32 -1 11 %a = and i32 %y, %s 12 store i32 %a, i32 addrspace(1)* %p, align 4 13 ret void 14} 15 16; GCN-LABEL: {{^}}select_and2: 17; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 18; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 19; GCN-NOT: v_and_b32 20; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 21define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) { 22 %c = icmp slt i32 %x, 11 23 %s = select i1 %c, i32 0, i32 -1 24 %a = and i32 %s, %y 25 store i32 %a, i32 addrspace(1)* %p, align 4 26 ret void 27} 28 29; GCN-LABEL: {{^}}select_and3: 30; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 31; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 32; GCN-NOT: v_and_b32 33; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 34define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) { 35 %c = icmp slt i32 %x, 11 36 %s = select i1 %c, i32 -1, i32 0 37 %a = and i32 %y, %s 38 store i32 %a, i32 addrspace(1)* %p, align 4 39 ret void 40} 41 42; GCN-LABEL: {{^}}select_and_v4: 43; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, 44; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, 45; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, 46; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, 47; GCN-NOT: v_and_b32 48; GCN: store_dword 49define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { 50 %c = icmp slt i32 %x, 11 51 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 52 %a = and <4 x i32> %s, %y 53 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 54 ret void 55} 56 57; GCN-LABEL: {{^}}select_or1: 58; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 59; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 60; GCN-NOT: v_or_b32 61; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 62define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) { 63 %c = icmp slt i32 %x, 11 64 %s = select i1 %c, i32 0, i32 -1 65 %a = or i32 %y, %s 66 store i32 %a, i32 addrspace(1)* %p, align 4 67 ret void 68} 69 70; GCN-LABEL: {{^}}select_or2: 71; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 72; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 73; GCN-NOT: v_or_b32 74; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 75define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) { 76 %c = icmp slt i32 %x, 11 77 %s = select i1 %c, i32 0, i32 -1 78 %a = or i32 %s, %y 79 store i32 %a, i32 addrspace(1)* %p, align 4 80 ret void 81} 82 83; GCN-LABEL: {{^}}select_or3: 84; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 85; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 86; GCN-NOT: v_or_b32 87; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 88define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) { 89 %c = icmp slt i32 %x, 11 90 %s = select i1 %c, i32 -1, i32 0 91 %a = or i32 %y, %s 92 store i32 %a, i32 addrspace(1)* %p, align 4 93 ret void 94} 95 96; GCN-LABEL: {{^}}select_or_v4: 97; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, 98; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, 99; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, 100; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, 101; GCN-NOT: v_or_b32 102; GCN: store_dword 103define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { 104 %c = icmp slt i32 %x, 11 105 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 106 %a = or <4 x i32> %s, %y 107 store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 108 ret void 109} 110 111; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants: 112; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2 113define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { 114 %sel = select i1 %cond, i32 -4, i32 3 115 %bo = sub i32 5, %sel 116 store i32 %bo, i32 addrspace(1)* %p, align 4 117 ret void 118} 119 120; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16: 121; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, 122define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) { 123 %sel = select i1 %cond, i16 -4, i16 3 124 %bo = sub i16 5, %sel 125 store i16 %bo, i16 addrspace(1)* %p, align 2 126 ret void 127} 128 129; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg: 130; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449 131; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3, 132define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) { 133 %sel = select i1 %cond, i16 4, i16 3000 134 %bo = sub i16 1, %sel 135 store i16 %bo, i16 addrspace(1)* %p, align 2 136 ret void 137} 138 139; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16: 140; GCN-DAG: s_mov_b32 [[T:s[0-9]+]], 0x50009 141; GCN: s_cselect_b32 s{{[0-9]+}}, [[T]], 0x60002 142define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) { 143 %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1> 144 %bo = sub <2 x i16> <i16 5, i16 7>, %sel 145 store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4 146 ret void 147} 148 149; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32: 150; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, 151; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5, 152; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6, 153; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7, 154define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) { 155 %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3> 156 %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel 157 store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32 158 ret void 159} 160 161; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64: 162; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 163define amdgpu_kernel void @sdiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { 164 %sel = select i1 %cond, i64 121, i64 23 165 %bo = sdiv i64 120, %sel 166 store i64 %bo, i64 addrspace(1)* %p, align 8 167 ret void 168} 169 170; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i32: 171; GCN: s_cselect_b32 s{{[0-9]+}}, 26, 8 172define amdgpu_kernel void @sdiv_constant_sel_constants_i32(i32 addrspace(1)* %p, i1 %cond) { 173 %sel = select i1 %cond, i32 7, i32 23 174 %bo = sdiv i32 184, %sel 175 store i32 %bo, i32 addrspace(1)* %p, align 8 176 ret void 177} 178 179; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64: 180; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 181define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { 182 %sel = select i1 %cond, i64 -4, i64 23 183 %bo = udiv i64 120, %sel 184 store i64 %bo, i64 addrspace(1)* %p, align 8 185 ret void 186} 187 188; GCN-LABEL: {{^}}srem_constant_sel_constants: 189; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 190define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { 191 %sel = select i1 %cond, i64 34, i64 15 192 %bo = srem i64 33, %sel 193 store i64 %bo, i64 addrspace(1)* %p, align 8 194 ret void 195} 196 197; GCN-LABEL: {{^}}urem_constant_sel_constants: 198; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 199define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { 200 %sel = select i1 %cond, i64 34, i64 15 201 %bo = urem i64 33, %sel 202 store i64 %bo, i64 addrspace(1)* %p, align 8 203 ret void 204} 205 206; GCN-LABEL: {{^}}shl_constant_sel_constants: 207; GCN: s_cselect_b32 s{{[0-9]+}}, 4, 8 208define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { 209 %sel = select i1 %cond, i32 2, i32 3 210 %bo = shl i32 1, %sel 211 store i32 %bo, i32 addrspace(1)* %p, align 4 212 ret void 213} 214 215; GCN-LABEL: {{^}}lshr_constant_sel_constants: 216; GCN: s_cselect_b32 s{{[0-9]+}}, 16, 8 217define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { 218 %sel = select i1 %cond, i32 2, i32 3 219 %bo = lshr i32 64, %sel 220 store i32 %bo, i32 addrspace(1)* %p, align 4 221 ret void 222} 223 224; GCN-LABEL: {{^}}ashr_constant_sel_constants: 225; GCN: s_cselect_b32 s{{[0-9]+}}, 32, 16 226define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { 227 %sel = select i1 %cond, i32 2, i32 3 228 %bo = ashr i32 128, %sel 229 store i32 %bo, i32 addrspace(1)* %p, align 4 230 ret void 231} 232 233; GCN-LABEL: {{^}}fsub_constant_sel_constants: 234; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0, 235define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { 236 %sel = select i1 %cond, float -2.0, float 3.0 237 %bo = fsub float -1.0, %sel 238 store float %bo, float addrspace(1)* %p, align 4 239 ret void 240} 241 242; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16: 243; TODO: it shall be possible to fold constants with OpSel 244; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00 245; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400 246; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]], 247define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) { 248 %sel = select i1 %cond, half -2.0, half 3.0 249 %bo = fsub half -1.0, %sel 250 store half %bo, half addrspace(1)* %p, align 2 251 ret void 252} 253 254; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16: 255; GCN: s_cselect_b32 s{{[0-9]+}}, 0x45003c00, -2.0 256define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) { 257 %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0> 258 %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel 259 store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4 260 ret void 261} 262 263; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32: 264; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000 265; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000 266; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000 267; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000 268; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, 269; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]], 270; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]], 271; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]], 272define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) { 273 %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0> 274 %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel 275 store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32 276 ret void 277} 278 279; GCN-LABEL: {{^}}fdiv_constant_sel_constants: 280; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0, 281define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { 282 %sel = select i1 %cond, float -4.0, float 2.0 283 %bo = fdiv float 8.0, %sel 284 store float %bo, float addrspace(1)* %p, align 4 285 ret void 286} 287 288; GCN-LABEL: {{^}}frem_constant_sel_constants: 289; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, 290define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { 291 %sel = select i1 %cond, float -4.0, float 3.0 292 %bo = frem float 5.0, %sel 293 store float %bo, float addrspace(1)* %p, align 4 294 ret void 295} 296