1; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s 2 3; Verify when widening a divide/remainder operation, we only generate a 4; divide/rem per element since divide/remainder can trap. 5 6define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { 7; CHECK: idivl 8; CHECK: idivl 9; CHECK-NOT: idivl 10; CHECK: ret 11entry: 12 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 13 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 14 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 15 %index = alloca i32, align 4 16 store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr 17 store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr 18 store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr 19 %tmp = load <2 x i32> addrspace(1)** %qdest.addr 20 %tmp1 = load i32* %index 21 %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1 22 %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr 23 %tmp3 = load i32* %index 24 %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3 25 %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4 26 %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr 27 %tmp7 = load i32* %index 28 %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7 29 %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8 30 %tmp10 = sdiv <2 x i32> %tmp5, %tmp9 31 store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx 32 ret void 33} 34 35define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { 36; CHECK: idivb 37; CHECK: idivb 38; CHECK: idivb 39; CHECK-NOT: idivb 40; CHECK: ret 41 %div.r = sdiv <3 x i8> %num, %div 42 ret <3 x i8> %div.r 43} 44 45define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { 46; CHECK: divb 47; CHECK: divb 48; CHECK: divb 49; CHECK-NOT: divb 50; CHECK: ret 51 %div.r = udiv <3 x i8> %num, %div 52 ret <3 x i8> %div.r 53} 54 55define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { 56; CHECK: idivw 57; CHECK: idivw 58; CHECK: idivw 59; CHECK: idivw 60; CHECK: idivw 61; CHECK-NOT: idivw 62; CHECK: ret 63 %div.r = sdiv <5 x i16> %num, %div 64 ret <5 x i16> %div.r 65} 66 67define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { 68; CHECK: divw 69; CHECK: divw 70; CHECK: divw 71; CHECK: divw 72; CHECK-NOT: divw 73; CHECK: ret 74 %div.r = udiv <4 x i16> %num, %div 75 ret <4 x i16> %div.r 76} 77 78define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { 79; CHECK: divl 80; CHECK: divl 81; CHECK: divl 82; CHECK-NOT: divl 83; CHECK: ret 84 %div.r = udiv <3 x i32> %num, %div 85 ret <3 x i32> %div.r 86} 87 88define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { 89; CHECK: idivq 90; CHECK: idivq 91; CHECK: idivq 92; CHECK-NOT: idivq 93; CHECK: ret 94 %div.r = sdiv <3 x i64> %num, %div 95 ret <3 x i64> %div.r 96} 97 98define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { 99; CHECK: divq 100; CHECK: divq 101; CHECK: divq 102; CHECK-NOT: divq 103; CHECK: ret 104 %div.r = udiv <3 x i64> %num, %div 105 ret <3 x i64> %div.r 106} 107 108 109define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { 110; CHECK: idivb 111; CHECK: idivb 112; CHECK: idivb 113; CHECK: idivb 114; CHECK-NOT: idivb 115; CHECK: ret 116 %rem.r = srem <4 x i8> %num, %rem 117 ret <4 x i8> %rem.r 118} 119 120define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { 121; CHECK: idivw 122; CHECK: idivw 123; CHECK: idivw 124; CHECK: idivw 125; CHECK: idivw 126; CHECK-NOT: idivw 127; CHECK: ret 128 %rem.r = srem <5 x i16> %num, %rem 129 ret <5 x i16> %rem.r 130} 131 132define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { 133; CHECK: idivl 134; CHECK: idivl 135; CHECK: idivl 136; CHECK: idivl 137; CHECK-NOT: idivl 138; CHECK: ret 139 %rem.r = srem <4 x i32> %num, %rem 140 ret <4 x i32> %rem.r 141} 142 143 144define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { 145; CHECK: divq 146; CHECK: divq 147; CHECK: divq 148; CHECK: divq 149; CHECK: divq 150; CHECK-NOT: divq 151; CHECK: ret 152 %rem.r = urem <5 x i64> %num, %rem 153 ret <5 x i64> %rem.r 154} 155 156define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { 157; CHECK: idivl 158; CHECK: idivl 159; CHECK: idivl 160; CHECK-NOT: idivl 161; CHECK: ret 162entry: 163 %cmp13 = icmp sgt i32 %n, 0 164 br i1 %cmp13, label %bb.nph, label %for.end 165 166bb.nph: 167 br label %for.body 168 169for.body: 170 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 171 %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014 172 %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] 173 %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014 174 %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] 175 %div = sdiv <3 x i32> %tmp4, %tmp8 176 store <3 x i32> %div, <3 x i32>* %arrayidx11 177 %inc = add nsw i32 %i.014, 1 178 %exitcond = icmp eq i32 %inc, %n 179 br i1 %exitcond, label %for.end, label %for.body 180 181for.end: ; preds = %for.body, %entry 182 ret void 183} 184