1; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}sdiv24_i8: 6; SI: v_cvt_f32_i32 7; SI: v_cvt_f32_i32 8; SI: v_rcp_iflag_f32 9; SI: v_cvt_i32_f32 10 11; EG: INT_TO_FLT 12; EG-DAG: INT_TO_FLT 13; EG-DAG: RECIP_IEEE 14; EG: FLT_TO_INT 15define amdgpu_kernel void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 17 %num = load i8, i8 addrspace(1) * %in 18 %den = load i8, i8 addrspace(1) * %den_ptr 19 %result = sdiv i8 %num, %den 20 store i8 %result, i8 addrspace(1)* %out 21 ret void 22} 23 24; FUNC-LABEL: {{^}}sdiv24_i16: 25; SI: v_cvt_f32_i32 26; SI: v_cvt_f32_i32 27; SI: v_rcp_iflag_f32 28; SI: v_cvt_i32_f32 29 30; EG: INT_TO_FLT 31; EG-DAG: INT_TO_FLT 32; EG-DAG: RECIP_IEEE 33; EG: FLT_TO_INT 34define amdgpu_kernel void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 36 %num = load i16, i16 addrspace(1) * %in, align 2 37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 38 %result = sdiv i16 %num, %den 39 store i16 %result, i16 addrspace(1)* %out, align 2 40 ret void 41} 42 43; FUNC-LABEL: {{^}}sdiv24_i32: 44; SI: v_cvt_f32_i32 45; SI: v_cvt_f32_i32 46; SI: v_rcp_iflag_f32 47; SI: v_cvt_i32_f32 48 49; EG: INT_TO_FLT 50; EG-DAG: INT_TO_FLT 51; EG-DAG: RECIP_IEEE 52; EG: FLT_TO_INT 53define amdgpu_kernel void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 55 %num = load i32, i32 addrspace(1) * %in, align 4 56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 57 %num.i24.0 = shl i32 %num, 8 58 %den.i24.0 = shl i32 %den, 8 59 %num.i24 = ashr i32 %num.i24.0, 8 60 %den.i24 = ashr i32 %den.i24.0, 8 61 %result = sdiv i32 %num.i24, %den.i24 62 store i32 %result, i32 addrspace(1)* %out, align 4 63 ret void 64} 65 66; FUNC-LABEL: {{^}}sdiv25_i32: 67; SI-NOT: v_cvt_f32_i32 68; SI-NOT: v_rcp_f32 69 70; EG-NOT: INT_TO_FLT 71; EG-NOT: RECIP_IEEE 72define amdgpu_kernel void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 73 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 74 %num = load i32, i32 addrspace(1) * %in, align 4 75 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 76 %num.i24.0 = shl i32 %num, 7 77 %den.i24.0 = shl i32 %den, 7 78 %num.i24 = ashr i32 %num.i24.0, 7 79 %den.i24 = ashr i32 %den.i24.0, 7 80 %result = sdiv i32 %num.i24, %den.i24 81 store i32 %result, i32 addrspace(1)* %out, align 4 82 ret void 83} 84 85; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1: 86; SI-NOT: v_cvt_f32_i32 87; SI-NOT: v_rcp_f32 88 89; EG-NOT: INT_TO_FLT 90; EG-NOT: RECIP_IEEE 91define amdgpu_kernel void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 92 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 93 %num = load i32, i32 addrspace(1) * %in, align 4 94 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 95 %num.i24.0 = shl i32 %num, 8 96 %den.i24.0 = shl i32 %den, 7 97 %num.i24 = ashr i32 %num.i24.0, 8 98 %den.i24 = ashr i32 %den.i24.0, 7 99 %result = sdiv i32 %num.i24, %den.i24 100 store i32 %result, i32 addrspace(1)* %out, align 4 101 ret void 102} 103 104; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2: 105; SI-NOT: v_cvt_f32_i32 106; SI-NOT: v_rcp_f32 107 108; EG-NOT: INT_TO_FLT 109; EG-NOT: RECIP_IEEE 110define amdgpu_kernel void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 111 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 112 %num = load i32, i32 addrspace(1) * %in, align 4 113 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 114 %num.i24.0 = shl i32 %num, 7 115 %den.i24.0 = shl i32 %den, 8 116 %num.i24 = ashr i32 %num.i24.0, 7 117 %den.i24 = ashr i32 %den.i24.0, 8 118 %result = sdiv i32 %num.i24, %den.i24 119 store i32 %result, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; FUNC-LABEL: {{^}}srem24_i8: 124; SI: v_cvt_f32_i32 125; SI: v_cvt_f32_i32 126; SI: v_rcp_iflag_f32 127; SI: v_cvt_i32_f32 128 129; EG: INT_TO_FLT 130; EG-DAG: INT_TO_FLT 131; EG-DAG: RECIP_IEEE 132; EG: FLT_TO_INT 133define amdgpu_kernel void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 134 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 135 %num = load i8, i8 addrspace(1) * %in 136 %den = load i8, i8 addrspace(1) * %den_ptr 137 %result = srem i8 %num, %den 138 store i8 %result, i8 addrspace(1)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}srem24_i16: 143; SI: v_cvt_f32_i32 144; SI: v_cvt_f32_i32 145; SI: v_rcp_iflag_f32 146; SI: v_cvt_i32_f32 147 148; EG: INT_TO_FLT 149; EG-DAG: INT_TO_FLT 150; EG-DAG: RECIP_IEEE 151; EG: FLT_TO_INT 152define amdgpu_kernel void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 153 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 154 %num = load i16, i16 addrspace(1) * %in, align 2 155 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 156 %result = srem i16 %num, %den 157 store i16 %result, i16 addrspace(1)* %out, align 2 158 ret void 159} 160 161; FUNC-LABEL: {{^}}srem24_i32: 162; SI: v_cvt_f32_i32 163; SI: v_cvt_f32_i32 164; SI: v_rcp_iflag_f32 165; SI: v_cvt_i32_f32 166 167; EG: INT_TO_FLT 168; EG-DAG: INT_TO_FLT 169; EG-DAG: RECIP_IEEE 170; EG: FLT_TO_INT 171define amdgpu_kernel void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 172 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 173 %num = load i32, i32 addrspace(1) * %in, align 4 174 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 175 %num.i24.0 = shl i32 %num, 8 176 %den.i24.0 = shl i32 %den, 8 177 %num.i24 = ashr i32 %num.i24.0, 8 178 %den.i24 = ashr i32 %den.i24.0, 8 179 %result = srem i32 %num.i24, %den.i24 180 store i32 %result, i32 addrspace(1)* %out, align 4 181 ret void 182} 183 184; FUNC-LABEL: {{^}}no_srem25_i32: 185; SI-NOT: v_cvt_f32_i32 186; SI-NOT: v_rcp_f32 187 188; EG-NOT: INT_TO_FLT 189; EG-NOT: RECIP_IEEE 190define amdgpu_kernel void @no_srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 191 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 192 %num = load i32, i32 addrspace(1) * %in, align 4 193 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 194 %num.i24.0 = shl i32 %num, 7 195 %den.i24.0 = shl i32 %den, 7 196 %num.i24 = ashr i32 %num.i24.0, 7 197 %den.i24 = ashr i32 %den.i24.0, 7 198 %result = srem i32 %num.i24, %den.i24 199 store i32 %result, i32 addrspace(1)* %out, align 4 200 ret void 201} 202 203; FUNC-LABEL: {{^}}no_sdiv25_i24_i25_i32: 204; SI-NOT: v_cvt_f32_i32 205; SI-NOT: v_rcp_f32 206 207; EG-NOT: INT_TO_FLT 208; EG-NOT: RECIP_IEEE 209define amdgpu_kernel void @no_sdiv25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 210 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 211 %num = load i32, i32 addrspace(1) * %in, align 4 212 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 213 %num.i24.0 = shl i32 %num, 8 214 %den.i25.0 = shl i32 %den, 7 215 %num.i24 = ashr i32 %num.i24.0, 8 216 %den.i25 = ashr i32 %den.i25.0, 7 217 %result = sdiv i32 %num.i24, %den.i25 218 store i32 %result, i32 addrspace(1)* %out, align 4 219 ret void 220} 221 222; FUNC-LABEL: {{^}}no_sdiv25_i25_i24_i32: 223; SI-NOT: v_cvt_f32_i32 224; SI-NOT: v_rcp_f32 225 226; EG-NOT: INT_TO_FLT 227; EG-NOT: RECIP_IEEE 228define amdgpu_kernel void @no_sdiv25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 229 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 230 %num = load i32, i32 addrspace(1) * %in, align 4 231 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 232 %num.i25.0 = shl i32 %num, 7 233 %den.i24.0 = shl i32 %den, 8 234 %num.i25 = ashr i32 %num.i25.0, 7 235 %den.i24 = ashr i32 %den.i24.0, 8 236 %result = sdiv i32 %num.i25, %den.i24 237 store i32 %result, i32 addrspace(1)* %out, align 4 238 ret void 239} 240 241; FUNC-LABEL: {{^}}no_srem25_i24_i25_i32: 242; SI-NOT: v_cvt_f32_i32 243; SI-NOT: v_rcp_f32 244 245; EG-NOT: INT_TO_FLT 246; EG-NOT: RECIP_IEEE 247define amdgpu_kernel void @no_srem25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 248 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 249 %num = load i32, i32 addrspace(1) * %in, align 4 250 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 251 %num.i24.0 = shl i32 %num, 8 252 %den.i25.0 = shl i32 %den, 7 253 %num.i24 = ashr i32 %num.i24.0, 8 254 %den.i25 = ashr i32 %den.i25.0, 7 255 %result = srem i32 %num.i24, %den.i25 256 store i32 %result, i32 addrspace(1)* %out, align 4 257 ret void 258} 259 260; FUNC-LABEL: {{^}}no_srem25_i25_i24_i32: 261; SI-NOT: v_cvt_f32_i32 262; SI-NOT: v_rcp_f32 263 264; EG-NOT: INT_TO_FLT 265; EG-NOT: RECIP_IEEE 266define amdgpu_kernel void @no_srem25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 267 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 268 %num = load i32, i32 addrspace(1) * %in, align 4 269 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 270 %num.i25.0 = shl i32 %num, 7 271 %den.i24.0 = shl i32 %den, 8 272 %num.i25 = ashr i32 %num.i25.0, 7 273 %den.i24 = ashr i32 %den.i24.0, 8 274 %result = srem i32 %num.i25, %den.i24 275 store i32 %result, i32 addrspace(1)* %out, align 4 276 ret void 277} 278 279; FUNC-LABEL: {{^}}srem25_i24_i11_i32: 280; SI: v_cvt_f32_i32 281; SI: v_rcp_iflag_f32 282; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24 283 284; EG: INT_TO_FLT 285; EG: RECIP_IEEE 286define amdgpu_kernel void @srem25_i24_i11_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 287 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 288 %num = load i32, i32 addrspace(1) * %in, align 4 289 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 290 %num.i24.0 = shl i32 %num, 8 291 %den.i11.0 = shl i32 %den, 21 292 %num.i24 = ashr i32 %num.i24.0, 8 293 %den.i11 = ashr i32 %den.i11.0, 21 294 %result = srem i32 %num.i24, %den.i11 295 store i32 %result, i32 addrspace(1)* %out, align 4 296 ret void 297} 298 299; FUNC-LABEL: {{^}}srem25_i11_i24_i32: 300; SI: v_cvt_f32_i32 301; SI: v_rcp_iflag_f32 302; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24 303 304; EG: INT_TO_FLT 305; EG: RECIP_IEEE 306define amdgpu_kernel void @srem25_i11_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 307 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 308 %num = load i32, i32 addrspace(1) * %in, align 4 309 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 310 %num.i11.0 = shl i32 %num, 21 311 %den.i24.0 = shl i32 %den, 8 312 %num.i11 = ashr i32 %num.i11.0, 21 313 %den.i24 = ashr i32 %den.i24.0, 8 314 %result = srem i32 %num.i11, %den.i24 315 store i32 %result, i32 addrspace(1)* %out, align 4 316 ret void 317} 318 319; FUNC-LABEL: {{^}}srem25_i17_i12_i32: 320; SI: v_cvt_f32_i32 321; SI: v_rcp_iflag_f32 322; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 17 323 324; EG: INT_TO_FLT 325; EG: RECIP_IEEE 326define amdgpu_kernel void @srem25_i17_i12_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 327 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 328 %num = load i32, i32 addrspace(1) * %in, align 4 329 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 330 %num.i17.0 = shl i32 %num, 15 331 %den.i12.0 = shl i32 %den, 20 332 %num.i17 = ashr i32 %num.i17.0, 15 333 %den.i12 = ashr i32 %den.i12.0, 20 334 %result = sdiv i32 %num.i17, %den.i12 335 store i32 %result, i32 addrspace(1)* %out, align 4 336 ret void 337} 338