1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4declare i32 @llvm.amdgcn.workitem.id.x() #1 5 6; GCN-LABEL: {{^}}v_cnd_nan_nosgpr: 7; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0 8; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]] 9; GCN-DAG: v{{[0-9]}} 10; All nan values are converted to 0xffffffff 11; GCN: s_endpgm 12define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { 13 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 14 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx 15 %f = load float, float addrspace(1)* %f.gep 16 %setcc = icmp ne i32 %c, 0 17 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f 18 store float %select, float addrspace(1)* %out 19 ret void 20} 21 22 23; This requires slightly trickier SGPR operand legalization since the 24; single constant bus SGPR usage is the last operand, and it should 25; never be moved. 26 27; GCN-LABEL: {{^}}v_cnd_nan: 28; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0 29; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]}}, vcc 30; GCN-DAG: v{{[0-9]}} 31; All nan values are converted to 0xffffffff 32; GCN: s_endpgm 33define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 { 34 %setcc = icmp ne i32 %c, 0 35 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f 36 store float %select, float addrspace(1)* %out 37 ret void 38} 39 40; Test different compare and select operand types for optimal code 41; shrinking. 42; (select (cmp (sgprX, constant)), constant, sgprZ) 43 44; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32: 45; GCN: s_load_dwordx2 46; GCN: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}} 47; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0 48; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] 49; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc 50define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { 51 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 52 %tid.ext = sext i32 %tid to i64 53 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 54 %setcc = fcmp one float %x, 0.0 55 %select = select i1 %setcc, float 1.0, float %z 56 store float %select, float addrspace(1)* %out.gep 57 ret void 58} 59 60; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32: 61; GCN: s_load_dword [[X:s[0-9]+]] 62; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 63; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] 64; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], vcc 65define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { 66 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 67 %tid.ext = sext i32 %tid to i64 68 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 69 %setcc = fcmp one float %x, 0.0 70 %select = select i1 %setcc, float 1.0, float %x 71 store float %select, float addrspace(1)* %out.gep 72 ret void 73} 74 75; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32: 76; GCN-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}} 77; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0 78; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] 79; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc 80define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { 81 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 82 %tid.ext = sext i32 %tid to i64 83 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 84 %setcc = fcmp one float %x, 0.0 85 %select = select i1 %setcc, float 0.0, float %z 86 store float %select, float addrspace(1)* %out.gep 87 ret void 88} 89 90; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32: 91; GCN: s_load_dword [[X:s[0-9]+]] 92; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 93; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] 94; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], vcc 95define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 97 %tid.ext = sext i32 %tid to i64 98 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 99 %setcc = fcmp one float %x, 0.0 100 %select = select i1 %setcc, float 0.0, float %x 101 store float %select, float addrspace(1)* %out.gep 102 ret void 103} 104 105; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32: 106; GCN-DAG: s_load_dword [[X:s[0-9]+]] 107; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] 108; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 109; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]] 110define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { 111 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 112 %tid.ext = sext i32 %tid to i64 113 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext 114 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 115 %z = load float, float addrspace(1)* %z.gep 116 %setcc = fcmp one float %x, 0.0 117 %select = select i1 %setcc, float 0.0, float %z 118 store float %select, float addrspace(1)* %out.gep 119 ret void 120} 121 122; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32: 123; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] 124; GCN-DAG: s_load_dword [[X:s[0-9]+]] 125; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 126; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]] 127define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { 128 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 129 %tid.ext = sext i32 %tid to i64 130 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext 131 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 132 %z = load float, float addrspace(1)* %z.gep 133 %setcc = fcmp one float %x, 0.0 134 %select = select i1 %setcc, float 1.0, float %z 135 store float %select, float addrspace(1)* %out.gep 136 ret void 137} 138 139; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32: 140; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 141; GCN-DAG: s_load_dword [[Z:s[0-9]+]] 142; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]] 143; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 144; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc 145define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 { 146 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 147 %tid.ext = sext i32 %tid to i64 148 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 149 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 150 %x = load float, float addrspace(1)* %x.gep 151 %setcc = fcmp olt float %x, 0.0 152 %select = select i1 %setcc, float 1.0, float %z 153 store float %select, float addrspace(1)* %out.gep 154 ret void 155} 156 157; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32: 158; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 159; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] 160; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] 161; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc 162define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { 163 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 164 %tid.ext = sext i32 %tid to i64 165 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 166 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext 167 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 168 %x = load volatile float, float addrspace(1)* %x.gep 169 %z = load volatile float, float addrspace(1)* %z.gep 170 %setcc = fcmp ult float %x, 0.0 171 %select = select i1 %setcc, float 1.0, float %z 172 store float %select, float addrspace(1)* %out.gep 173 ret void 174} 175 176; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32: 177; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 178; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] 179; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]] 180; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc 181define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 { 182 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 183 %tid.ext = sext i32 %tid to i64 184 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext 185 %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext 186 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 187 %x = load volatile i32, i32 addrspace(1)* %x.gep 188 %z = load volatile i32, i32 addrspace(1)* %z.gep 189 %setcc = icmp slt i32 %x, 0 190 %select = select i1 %setcc, i32 2, i32 %z 191 store i32 %select, i32 addrspace(1)* %out.gep 192 ret void 193} 194 195; FIXME: Why does VI make the wrong regalloc choice? 196; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64: 197; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}} 198; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}} 199; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}} 200; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc 201; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc 202 203; VI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}} 204; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc 205; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc 206define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { 207 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 208 %tid.ext = sext i32 %tid to i64 209 %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext 210 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext 211 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext 212 %x = load volatile i64, i64 addrspace(1)* %x.gep 213 %z = load volatile i64, i64 addrspace(1)* %z.gep 214 %setcc = icmp slt i64 %x, 0 215 %select = select i1 %setcc, i64 2, i64 %z 216 store i64 %select, i64 addrspace(1)* %out.gep 217 ret void 218} 219 220; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32: 221; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 222; GCN: {{buffer|flat}}_load_dwordx4 223 224; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]] 225; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc 226; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc 227; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc 228; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc 229define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { 230 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 231 %tid.ext = sext i32 %tid to i64 232 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 233 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext 234 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext 235 %x = load volatile float, float addrspace(1)* %x.gep 236 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep 237 %setcc = fcmp ugt float %x, 4.0 238 %select = select i1 %setcc, <4 x float> %z, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0> 239 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep 240 ret void 241} 242 243; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32: 244; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 245; GCN: {{buffer|flat}}_load_dwordx4 246 247; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]] 248; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc 249; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc 250; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc 251; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc 252define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { 253 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 254 %tid.ext = sext i32 %tid to i64 255 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 256 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext 257 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext 258 %x = load volatile float, float addrspace(1)* %x.gep 259 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep 260 %setcc = fcmp ugt float %x, 4.0 261 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z 262 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep 263 ret void 264} 265 266; This must be swapped as a vector type before the condition has 267; multiple uses. 268 269; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32: 270; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 271; GCN: {{buffer|flat}}_load_dwordx4 272 273; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]] 274; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc 275; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc 276; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc 277; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc 278define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { 279 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 280 %tid.ext = sext i32 %tid to i64 281 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 282 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext 283 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext 284 %x = load volatile float, float addrspace(1)* %x.gep 285 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep 286 %setcc = fcmp ugt float 4.0, %x 287 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z 288 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep 289 ret void 290} 291 292; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1: 293; GCN: load_dword 294; GCN: load_ubyte 295; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v 296; DCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1, 297; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v 298; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}} 299; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s 300; GCN: store_byte 301define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 { 302 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 303 %tid.ext = sext i32 %tid to i64 304 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext 305 %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext 306 %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext 307 %x = load volatile i32, i32 addrspace(1)* %x.gep 308 %z = load volatile i1, i1 addrspace(1)* %z.gep 309 %setcc = icmp slt i32 %x, 0 310 %select = select i1 %setcc, i1 true, i1 %z 311 store i1 %select, i1 addrspace(1)* %out.gep 312 ret void 313} 314 315; Different types compared vs. selected 316; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32: 317; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000 318; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 319; GCN-DAG: {{buffer|flat}}_load_dwordx2 320 321; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] 322; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc 323; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc 324define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 { 325 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 326 %tid.ext = sext i32 %tid to i64 327 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 328 %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext 329 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 330 %x = load volatile float, float addrspace(1)* %x.gep 331 %z = load volatile double, double addrspace(1)* %z.gep 332 %setcc = fcmp ult float %x, 0.0 333 %select = select i1 %setcc, double 1.0, double %z 334 store double %select, double addrspace(1)* %out.gep 335 ret void 336} 337 338; Different types compared vs. selected 339; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32: 340; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 341; GCN: {{buffer|flat}}_load_dwordx2 342 343; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]] 344; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc 345; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc 346define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { 347 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 348 %tid.ext = sext i32 %tid to i64 349 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 350 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext 351 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext 352 %x = load volatile float, float addrspace(1)* %x.gep 353 %z = load volatile i64, i64 addrspace(1)* %z.gep 354 %setcc = fcmp one float %x, 0.0 355 %select = select i1 %setcc, i64 3, i64 %z 356 store i64 %select, i64 addrspace(1)* %out.gep 357 ret void 358} 359 360; Different types compared vs. selected 361; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32: 362; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 363; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] 364 365; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]] 366; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc 367define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { 368 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 369 %tid.ext = sext i32 %tid to i64 370 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext 371 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext 372 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 373 %x = load volatile i32, i32 addrspace(1)* %x.gep 374 %z = load volatile float, float addrspace(1)* %z.gep 375 %setcc = icmp ugt i32 %x, 1 376 %select = select i1 %setcc, float 4.0, float %z 377 store float %select, float addrspace(1)* %out.gep 378 ret void 379} 380 381; FIXME: Should be able to handle multiple uses 382 383; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2: 384; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 385 386; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]] 387; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc 388; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc 389define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { 390 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 391 %tid.ext = sext i32 %tid to i64 392 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext 393 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext 394 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 395 %x = load volatile float, float addrspace(1)* %x.gep 396 %z = load volatile float, float addrspace(1)* %z.gep 397 %setcc = fcmp ugt float 4.0, %x 398 %select0 = select i1 %setcc, float -1.0, float %z 399 %select1 = select i1 %setcc, float -2.0, float %z 400 store volatile float %select0, float addrspace(1)* %out.gep 401 store volatile float %select1, float addrspace(1)* %out.gep 402 ret void 403} 404 405attributes #0 = { nounwind } 406attributes #1 = { nounwind readnone } 407