1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 4; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s 5 6; FUNC-LABEL: {{^}}s_abs_i32: 7; GCN: s_abs_i32 8; GCN: s_add_i32 9 10; EG: MAX_INT 11define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind { 12 %neg = sub i32 0, %val 13 %cond = icmp sgt i32 %val, %neg 14 %res = select i1 %cond, i32 %val, i32 %neg 15 %res2 = add i32 %res, 2 16 store i32 %res2, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}v_abs_i32: 21; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 22; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] 23 24; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]] 25 26; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 27; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2 28 29; EG: MAX_INT 30define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 31 %tid = call i32 @llvm.amdgcn.workitem.id.x() 32 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid 33 %val = load i32, i32 addrspace(1)* %gep.in, align 4 34 %neg = sub i32 0, %val 35 %cond = icmp sgt i32 %val, %neg 36 %res = select i1 %cond, i32 %val, i32 %neg 37 %res2 = add i32 %res, 2 38 store i32 %res2, i32 addrspace(1)* %out, align 4 39 ret void 40} 41 42; GCN-LABEL: {{^}}v_abs_i32_repeat_user: 43; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 44; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] 45; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] 46; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]] 47define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 48 %tid = call i32 @llvm.amdgcn.workitem.id.x() 49 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid 50 %val = load i32, i32 addrspace(1)* %gep.in, align 4 51 %neg = sub i32 0, %val 52 %cond = icmp sgt i32 %val, %neg 53 %res = select i1 %cond, i32 %val, i32 %neg 54 %mul = mul i32 %res, %res 55 store i32 %mul, i32 addrspace(1)* %out, align 4 56 ret void 57} 58 59; FUNC-LABEL: {{^}}s_abs_v2i32: 60; GCN: s_abs_i32 61; GCN: s_abs_i32 62; GCN: s_add_i32 63; GCN: s_add_i32 64 65; EG: MAX_INT 66; EG: MAX_INT 67define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { 68 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 69 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 70 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 71 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 72 %neg = sub <2 x i32> %z1, %val 73 %cond = icmp sgt <2 x i32> %val, %neg 74 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 75 %res2 = add <2 x i32> %res, %t1 76 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 77 ret void 78} 79 80; FUNC-LABEL: {{^}}v_abs_v2i32: 81; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 82; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 83 84; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] 85; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] 86 87; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] 88; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] 89 90; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 91; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 92 93; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 94; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 95 96; EG: MAX_INT 97; EG: MAX_INT 98define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { 99 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 100 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 101 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 102 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 103 %tid = call i32 @llvm.amdgcn.workitem.id.x() 104 %gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid 105 %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4 106 %neg = sub <2 x i32> %z1, %val 107 %cond = icmp sgt <2 x i32> %val, %neg 108 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 109 %res2 = add <2 x i32> %res, %t1 110 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 111 ret void 112} 113 114; FUNC-LABEL: {{^}}s_abs_v4i32: 115; TODO: this should use s_abs_i32 116; GCN: s_abs_i32 117; GCN: s_abs_i32 118; GCN: s_abs_i32 119; GCN: s_abs_i32 120 121; GCN: s_add_i32 122; GCN: s_add_i32 123; GCN: s_add_i32 124; GCN: s_add_i32 125 126; EG: MAX_INT 127; EG: MAX_INT 128; EG: MAX_INT 129; EG: MAX_INT 130define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { 131 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 132 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 133 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 134 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 135 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 136 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 137 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 138 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 139 %neg = sub <4 x i32> %z3, %val 140 %cond = icmp sgt <4 x i32> %val, %neg 141 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 142 %res2 = add <4 x i32> %res, %t3 143 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 144 ret void 145} 146 147; FUNC-LABEL: {{^}}v_abs_v4i32: 148 149; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 150; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 151; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] 152; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] 153 154; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] 155; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] 156; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]] 157; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]] 158 159; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] 160; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] 161; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]] 162; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]] 163 164; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 165; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 166; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 167; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 168 169; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 170; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 171; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 172; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 173 174; EG: MAX_INT 175; EG: MAX_INT 176; EG: MAX_INT 177; EG: MAX_INT 178define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind { 179 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 180 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 181 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 182 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 183 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 184 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 185 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 186 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 187 %tid = call i32 @llvm.amdgcn.workitem.id.x() 188 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid 189 %val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 190 %neg = sub <4 x i32> %z3, %val 191 %cond = icmp sgt <4 x i32> %val, %neg 192 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 193 %res2 = add <4 x i32> %res, %t3 194 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 195 ret void 196} 197 198; FUNC-LABEL: {{^}}s_min_max_i32: 199; GCN: s_load_dword [[VAL0:s[0-9]+]] 200; GCN: s_load_dword [[VAL1:s[0-9]+]] 201 202; GCN-DAG: s_min_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]] 203; GCN-DAG: s_max_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]] 204define amdgpu_kernel void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32], i32 %val0, [8 x i32], i32 %val1) nounwind { 205 %cond0 = icmp sgt i32 %val0, %val1 206 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 207 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 208 209 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 210 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 211 ret void 212} 213 214; FUNC-LABEL: {{^}}v_min_max_i32: 215; GCN: {{buffer|flat|global}}_load_dword [[VAL0:v[0-9]+]] 216; GCN: {{buffer|flat|global}}_load_dword [[VAL1:v[0-9]+]] 217 218; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] 219; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] 220define amdgpu_kernel void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind { 221 %val0 = load volatile i32, i32 addrspace(1)* %ptr0 222 %val1 = load volatile i32, i32 addrspace(1)* %ptr1 223 224 %cond0 = icmp sgt i32 %val0, %val1 225 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 226 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 227 228 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 229 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 230 ret void 231} 232 233; FUNC-LABEL: {{^}}s_min_max_v4i32: 234; GCN-DAG: s_min_i32 235; GCN-DAG: s_min_i32 236; GCN-DAG: s_min_i32 237; GCN-DAG: s_min_i32 238; GCN-DAG: s_max_i32 239; GCN-DAG: s_max_i32 240; GCN-DAG: s_max_i32 241; GCN-DAG: s_max_i32 242define amdgpu_kernel void @s_min_max_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %val0, <4 x i32> %val1) nounwind { 243 %cond0 = icmp sgt <4 x i32> %val0, %val1 244 %sel0 = select <4 x i1> %cond0, <4 x i32> %val0, <4 x i32> %val1 245 %sel1 = select <4 x i1> %cond0, <4 x i32> %val1, <4 x i32> %val0 246 247 store volatile <4 x i32> %sel0, <4 x i32> addrspace(1)* %out0, align 4 248 store volatile <4 x i32> %sel1, <4 x i32> addrspace(1)* %out1, align 4 249 ret void 250} 251 252; FUNC-LABEL: {{^}}v_min_max_i32_user: 253; GCN: v_cmp_gt_i32_e32 254; GCN-DAG: v_cndmask_b32_e32 255; GCN-DAG: v_cndmask_b32_e32 256; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 257define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind { 258 %val0 = load volatile i32, i32 addrspace(1)* %ptr0 259 %val1 = load volatile i32, i32 addrspace(1)* %ptr1 260 261 %cond0 = icmp sgt i32 %val0, %val1 262 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 263 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 264 265 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 266 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 267 store volatile i1 %cond0, i1 addrspace(1)* undef 268 ret void 269} 270 271declare i32 @llvm.amdgcn.workitem.id.x() #0 272 273attributes #0 = { nounwind readnone } 274attributes #1 = { nounwind } 275