1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}udiv32_invariant_denom: 4; GCN: v_cvt_f32_u32 5; GCN: v_rcp_iflag_f32 6; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 7; GCN: v_cvt_u32_f32_e32 8; GCN-DAG: v_mul_hi_u32 9; GCN-DAG: v_mul_lo_i32 10; GCN-DAG: v_sub_i32_e32 11; GCN-DAG: v_cmp_eq_u32_e64 12; GCN-DAG: v_cndmask_b32_e64 13; GCN-DAG: v_mul_hi_u32 14; GCN-DAG: v_add_i32_e32 15; GCN-DAG: v_subrev_i32_e32 16; GCN-DAG: v_cndmask_b32_e64 17; GCN: [[LOOP:BB[0-9_]+]]: 18; GCN-NOT: v_rcp 19; GCN: s_cbranch_scc0 [[LOOP]] 20; GCN: s_endpgm 21define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 22bb: 23 br label %bb3 24 25bb2: ; preds = %bb3 26 ret void 27 28bb3: ; preds = %bb3, %bb 29 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 30 %tmp4 = udiv i32 %tmp, %arg1 31 %tmp5 = zext i32 %tmp to i64 32 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 33 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 34 %tmp7 = add nuw nsw i32 %tmp, 1 35 %tmp8 = icmp eq i32 %tmp7, 1024 36 br i1 %tmp8, label %bb2, label %bb3 37} 38 39; GCN-LABEL: {{^}}urem32_invariant_denom: 40; GCN: v_cvt_f32_u32 41; GCN: v_rcp_iflag_f32 42; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 43; GCN: v_cvt_u32_f32_e32 44; GCN-DAG: v_mul_hi_u32 45; GCN-DAG: v_mul_lo_i32 46; GCN-DAG: v_sub_i32_e32 47; GCN-DAG: v_cmp_eq_u32_e64 48; GCN-DAG: v_cndmask_b32_e64 49; GCN-DAG: v_mul_hi_u32 50; GCN-DAG: v_add_i32_e32 51; GCN-DAG: v_subrev_i32_e32 52; GCN-DAG: v_cndmask_b32_e64 53; GCN: [[LOOP:BB[0-9_]+]]: 54; GCN-NOT: v_rcp 55; GCN: s_cbranch_scc0 [[LOOP]] 56; GCN: s_endpgm 57define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 58bb: 59 br label %bb3 60 61bb2: ; preds = %bb3 62 ret void 63 64bb3: ; preds = %bb3, %bb 65 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 66 %tmp4 = urem i32 %tmp, %arg1 67 %tmp5 = zext i32 %tmp to i64 68 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 69 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 70 %tmp7 = add nuw nsw i32 %tmp, 1 71 %tmp8 = icmp eq i32 %tmp7, 1024 72 br i1 %tmp8, label %bb2, label %bb3 73} 74 75; GCN-LABEL: {{^}}sdiv32_invariant_denom: 76; GCN: v_cvt_f32_u32 77; GCN: v_rcp_iflag_f32 78; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 79; GCN: v_cvt_u32_f32_e32 80; GCN-DAG: v_mul_hi_u32 81; GCN-DAG: v_mul_lo_i32 82; GCN-DAG: v_sub_i32_e32 83; GCN-DAG: v_cmp_eq_u32_e64 84; GCN-DAG: v_cndmask_b32_e64 85; GCN-DAG: v_mul_hi_u32 86; GCN-DAG: v_add_i32_e32 87; GCN-DAG: v_subrev_i32_e32 88; GCN-DAG: v_cndmask_b32_e64 89; GCN: [[LOOP:BB[0-9_]+]]: 90; GCN-NOT: v_rcp 91; GCN: s_cbranch_scc0 [[LOOP]] 92; GCN: s_endpgm 93define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 94bb: 95 br label %bb3 96 97bb2: ; preds = %bb3 98 ret void 99 100bb3: ; preds = %bb3, %bb 101 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 102 %tmp4 = sdiv i32 %tmp, %arg1 103 %tmp5 = zext i32 %tmp to i64 104 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 105 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 106 %tmp7 = add nuw nsw i32 %tmp, 1 107 %tmp8 = icmp eq i32 %tmp7, 1024 108 br i1 %tmp8, label %bb2, label %bb3 109} 110 111; GCN-LABEL: {{^}}srem32_invariant_denom: 112; GCN: v_cvt_f32_u32 113; GCN: v_rcp_iflag_f32 114; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, 115; GCN: v_cvt_u32_f32_e32 116; GCN-DAG: v_mul_hi_u32 117; GCN-DAG: v_mul_lo_i32 118; GCN-DAG: v_sub_i32_e32 119; GCN-DAG: v_cmp_eq_u32_e64 120; GCN-DAG: v_cndmask_b32_e64 121; GCN-DAG: v_mul_hi_u32 122; GCN-DAG: v_add_i32_e32 123; GCN-DAG: v_subrev_i32_e32 124; GCN-DAG: v_cndmask_b32_e64 125; GCN: [[LOOP:BB[0-9_]+]]: 126; GCN-NOT: v_rcp 127; GCN: s_cbranch_scc0 [[LOOP]] 128; GCN: s_endpgm 129define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 130bb: 131 br label %bb3 132 133bb2: ; preds = %bb3 134 ret void 135 136bb3: ; preds = %bb3, %bb 137 %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ] 138 %tmp4 = srem i32 %tmp, %arg1 139 %tmp5 = zext i32 %tmp to i64 140 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5 141 store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4 142 %tmp7 = add nuw nsw i32 %tmp, 1 143 %tmp8 = icmp eq i32 %tmp7, 1024 144 br i1 %tmp8, label %bb2, label %bb3 145} 146 147; GCN-LABEL: {{^}}udiv16_invariant_denom: 148; GCN: v_cvt_f32_u32 149; GCN: v_rcp_iflag_f32 150; GCN: [[LOOP:BB[0-9_]+]]: 151; GCN-NOT: v_rcp 152; GCN: s_cbranch_scc0 [[LOOP]] 153; GCN: s_endpgm 154define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 155bb: 156 br label %bb3 157 158bb2: ; preds = %bb3 159 ret void 160 161bb3: ; preds = %bb3, %bb 162 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 163 %tmp4 = udiv i16 %tmp, %arg1 164 %tmp5 = zext i16 %tmp to i64 165 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 166 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 167 %tmp7 = add nuw nsw i16 %tmp, 1 168 %tmp8 = icmp eq i16 %tmp7, 1024 169 br i1 %tmp8, label %bb2, label %bb3 170} 171 172; GCN-LABEL: {{^}}urem16_invariant_denom: 173; GCN: v_cvt_f32_u32 174; GCN: v_rcp_iflag_f32 175; GCN: [[LOOP:BB[0-9_]+]]: 176; GCN-NOT: v_rcp 177; GCN: s_cbranch_scc0 [[LOOP]] 178; GCN: s_endpgm 179define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 180bb: 181 br label %bb3 182 183bb2: ; preds = %bb3 184 ret void 185 186bb3: ; preds = %bb3, %bb 187 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 188 %tmp4 = urem i16 %tmp, %arg1 189 %tmp5 = zext i16 %tmp to i64 190 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 191 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 192 %tmp7 = add nuw nsw i16 %tmp, 1 193 %tmp8 = icmp eq i16 %tmp7, 1024 194 br i1 %tmp8, label %bb2, label %bb3 195} 196 197; GCN-LABEL: {{^}}sdiv16_invariant_denom: 198; GCN-DAG: s_sext_i32_i16 199; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff 200; GCN-DAG: v_cvt_f32_i32 201; GCN-DAG: v_rcp_iflag_f32 202; GCN: [[LOOP:BB[0-9_]+]]: 203; GCN-NOT: v_rcp 204; GCN: s_cbranch_scc0 [[LOOP]] 205; GCN: s_endpgm 206define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 207bb: 208 br label %bb3 209 210bb2: ; preds = %bb3 211 ret void 212 213bb3: ; preds = %bb3, %bb 214 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 215 %tmp4 = sdiv i16 %tmp, %arg1 216 %tmp5 = zext i16 %tmp to i64 217 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 218 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 219 %tmp7 = add nuw nsw i16 %tmp, 1 220 %tmp8 = icmp eq i16 %tmp7, 1024 221 br i1 %tmp8, label %bb2, label %bb3 222} 223 224; GCN-LABEL: {{^}}srem16_invariant_denom: 225; GCN-DAG: s_sext_i32_i16 226; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff 227; GCN-DAG: v_cvt_f32_i32 228; GCN-DAG: v_rcp_iflag_f32 229; GCN: [[LOOP:BB[0-9_]+]]: 230; GCN-NOT: v_rcp 231; GCN: s_cbranch_scc0 [[LOOP]] 232; GCN: s_endpgm 233define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { 234bb: 235 br label %bb3 236 237bb2: ; preds = %bb3 238 ret void 239 240bb3: ; preds = %bb3, %bb 241 %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ] 242 %tmp4 = srem i16 %tmp, %arg1 243 %tmp5 = zext i16 %tmp to i64 244 %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5 245 store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2 246 %tmp7 = add nuw nsw i16 %tmp, 1 247 %tmp8 = icmp eq i16 %tmp7, 1024 248 br i1 %tmp8, label %bb2, label %bb3 249} 250