1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}udiv24_i8: 6; SI: v_cvt_f32_ubyte 7; SI: v_cvt_f32_ubyte 8; SI: v_rcp_f32 9; SI: v_cvt_u32_f32 10 11; EG: UINT_TO_FLT 12; EG-DAG: UINT_TO_FLT 13; EG-DAG: RECIP_IEEE 14; EG: FLT_TO_UINT 15define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 17 %num = load i8, i8 addrspace(1) * %in 18 %den = load i8, i8 addrspace(1) * %den_ptr 19 %result = udiv i8 %num, %den 20 store i8 %result, i8 addrspace(1)* %out 21 ret void 22} 23 24; FUNC-LABEL: {{^}}udiv24_i16: 25; SI: v_cvt_f32_u32 26; SI: v_cvt_f32_u32 27; SI: v_rcp_f32 28; SI: v_cvt_u32_f32 29 30; EG: UINT_TO_FLT 31; EG-DAG: UINT_TO_FLT 32; EG-DAG: RECIP_IEEE 33; EG: FLT_TO_UINT 34define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 35 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 36 %num = load i16, i16 addrspace(1) * %in, align 2 37 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 38 %result = udiv i16 %num, %den 39 store i16 %result, i16 addrspace(1)* %out, align 2 40 ret void 41} 42 43; FUNC-LABEL: {{^}}udiv24_i32: 44; SI: v_cvt_f32_u32 45; SI-DAG: v_cvt_f32_u32 46; SI-DAG: v_rcp_f32 47; SI: v_cvt_u32_f32 48 49; EG: UINT_TO_FLT 50; EG-DAG: UINT_TO_FLT 51; EG-DAG: RECIP_IEEE 52; EG: FLT_TO_UINT 53define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 54 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 55 %num = load i32, i32 addrspace(1) * %in, align 4 56 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 57 %num.i24.0 = shl i32 %num, 8 58 %den.i24.0 = shl i32 %den, 8 59 %num.i24 = lshr i32 %num.i24.0, 8 60 %den.i24 = lshr i32 %den.i24.0, 8 61 %result = udiv i32 %num.i24, %den.i24 62 store i32 %result, i32 addrspace(1)* %out, align 4 63 ret void 64} 65 66; FUNC-LABEL: {{^}}udiv25_i32: 67; RCP_IFLAG is for URECIP in the full 32b alg 68; SI: v_rcp_iflag 69; SI-NOT: v_rcp_f32 70 71; EG-NOT: UINT_TO_FLT 72; EG-NOT: RECIP_IEEE 73define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 74 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 75 %num = load i32, i32 addrspace(1) * %in, align 4 76 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 77 %num.i24.0 = shl i32 %num, 7 78 %den.i24.0 = shl i32 %den, 7 79 %num.i24 = lshr i32 %num.i24.0, 7 80 %den.i24 = lshr i32 %den.i24.0, 7 81 %result = udiv i32 %num.i24, %den.i24 82 store i32 %result, i32 addrspace(1)* %out, align 4 83 ret void 84} 85 86; FUNC-LABEL: {{^}}test_no_udiv24_i32_1: 87; RCP_IFLAG is for URECIP in the full 32b alg 88; SI: v_rcp_iflag 89; SI-NOT: v_rcp_f32 90 91; EG-NOT: UINT_TO_FLT 92; EG-NOT: RECIP_IEEE 93define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 94 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 95 %num = load i32, i32 addrspace(1) * %in, align 4 96 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 97 %num.i24.0 = shl i32 %num, 8 98 %den.i24.0 = shl i32 %den, 7 99 %num.i24 = lshr i32 %num.i24.0, 8 100 %den.i24 = lshr i32 %den.i24.0, 7 101 %result = udiv i32 %num.i24, %den.i24 102 store i32 %result, i32 addrspace(1)* %out, align 4 103 ret void 104} 105 106; FUNC-LABEL: {{^}}test_no_udiv24_i32_2: 107; RCP_IFLAG is for URECIP in the full 32b alg 108; SI: v_rcp_iflag 109; SI-NOT: v_rcp_f32 110 111; EG-NOT: UINT_TO_FLT 112; EG-NOT: RECIP_IEEE 113define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 114 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 115 %num = load i32, i32 addrspace(1) * %in, align 4 116 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 117 %num.i24.0 = shl i32 %num, 7 118 %den.i24.0 = shl i32 %den, 8 119 %num.i24 = lshr i32 %num.i24.0, 7 120 %den.i24 = lshr i32 %den.i24.0, 8 121 %result = udiv i32 %num.i24, %den.i24 122 store i32 %result, i32 addrspace(1)* %out, align 4 123 ret void 124} 125 126; FUNC-LABEL: {{^}}urem24_i8: 127; SI: v_cvt_f32_ubyte 128; SI: v_cvt_f32_ubyte 129; SI: v_rcp_f32 130; SI: v_cvt_u32_f32 131 132; EG: UINT_TO_FLT 133; EG-DAG: UINT_TO_FLT 134; EG-DAG: RECIP_IEEE 135; EG: FLT_TO_UINT 136define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { 137 %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1 138 %num = load i8, i8 addrspace(1) * %in 139 %den = load i8, i8 addrspace(1) * %den_ptr 140 %result = urem i8 %num, %den 141 store i8 %result, i8 addrspace(1)* %out 142 ret void 143} 144 145; FUNC-LABEL: {{^}}urem24_i16: 146; SI: v_cvt_f32_u32 147; SI: v_cvt_f32_u32 148; SI: v_rcp_f32 149; SI: v_cvt_u32_f32 150 151; EG: UINT_TO_FLT 152; EG-DAG: UINT_TO_FLT 153; EG-DAG: RECIP_IEEE 154; EG: FLT_TO_UINT 155define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 156 %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 157 %num = load i16, i16 addrspace(1) * %in, align 2 158 %den = load i16, i16 addrspace(1) * %den_ptr, align 2 159 %result = urem i16 %num, %den 160 store i16 %result, i16 addrspace(1)* %out, align 2 161 ret void 162} 163 164; FUNC-LABEL: {{^}}urem24_i32: 165; SI: v_cvt_f32_u32 166; SI: v_cvt_f32_u32 167; SI: v_rcp_f32 168; SI: v_cvt_u32_f32 169 170; EG: UINT_TO_FLT 171; EG-DAG: UINT_TO_FLT 172; EG-DAG: RECIP_IEEE 173; EG: FLT_TO_UINT 174define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 175 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 176 %num = load i32, i32 addrspace(1) * %in, align 4 177 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 178 %num.i24.0 = shl i32 %num, 8 179 %den.i24.0 = shl i32 %den, 8 180 %num.i24 = lshr i32 %num.i24.0, 8 181 %den.i24 = lshr i32 %den.i24.0, 8 182 %result = urem i32 %num.i24, %den.i24 183 store i32 %result, i32 addrspace(1)* %out, align 4 184 ret void 185} 186 187; FUNC-LABEL: {{^}}urem25_i32: 188; RCP_IFLAG is for URECIP in the full 32b alg 189; SI: v_rcp_iflag 190; SI-NOT: v_rcp_f32 191 192; EG-NOT: UINT_TO_FLT 193; EG-NOT: RECIP_IEEE 194define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 195 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 196 %num = load i32, i32 addrspace(1) * %in, align 4 197 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 198 %num.i24.0 = shl i32 %num, 7 199 %den.i24.0 = shl i32 %den, 7 200 %num.i24 = lshr i32 %num.i24.0, 7 201 %den.i24 = lshr i32 %den.i24.0, 7 202 %result = urem i32 %num.i24, %den.i24 203 store i32 %result, i32 addrspace(1)* %out, align 4 204 ret void 205} 206 207; FUNC-LABEL: {{^}}test_no_urem24_i32_1: 208; RCP_IFLAG is for URECIP in the full 32b alg 209; SI: v_rcp_iflag 210; SI-NOT: v_rcp_f32 211 212; EG-NOT: UINT_TO_FLT 213; EG-NOT: RECIP_IEEE 214define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 215 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 216 %num = load i32, i32 addrspace(1) * %in, align 4 217 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 218 %num.i24.0 = shl i32 %num, 8 219 %den.i24.0 = shl i32 %den, 7 220 %num.i24 = lshr i32 %num.i24.0, 8 221 %den.i24 = lshr i32 %den.i24.0, 7 222 %result = urem i32 %num.i24, %den.i24 223 store i32 %result, i32 addrspace(1)* %out, align 4 224 ret void 225} 226 227; FUNC-LABEL: {{^}}test_no_urem24_i32_2: 228; RCP_IFLAG is for URECIP in the full 32b alg 229; SI: v_rcp_iflag 230; SI-NOT: v_rcp_f32 231 232; EG-NOT: UINT_TO_FLT 233; EG-NOT: RECIP_IEEE 234define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 235 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 236 %num = load i32, i32 addrspace(1) * %in, align 4 237 %den = load i32, i32 addrspace(1) * %den_ptr, align 4 238 %num.i24.0 = shl i32 %num, 7 239 %den.i24.0 = shl i32 %den, 8 240 %num.i24 = lshr i32 %num.i24.0, 7 241 %den.i24 = lshr i32 %den.i24.0, 8 242 %result = urem i32 %num.i24, %den.i24 243 store i32 %result, i32 addrspace(1)* %out, align 4 244 ret void 245} 246