1; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \ 2; RUN: -hexagon-bit=0 < %s | FileCheck %s 3 4; This spill should be eliminated. 5; CHECK-NOT: vmem(r29+#6) 6 7define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 { 8entry: 9 %0 = bitcast i8* %key to <32 x i32>* 10 %1 = bitcast i8* %data1 to <32 x i32>* 11 br label %for.body 12 13for.body: 14 %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ] 15 %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ] 16 %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ] 17 %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ] 18 %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128 19 %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128 20 %4 = load <32 x i32>, <32 x i32>* undef, align 128 21 %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2 22 %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128 23 %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2 24 %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128 25 %7 = load <32 x i32>, <32 x i32>* null, align 128 26 %8 = load <32 x i32>, <32 x i32>* undef, align 128 27 %9 = load <32 x i32>, <32 x i32>* null, align 128 28 %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3 29 %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6 30 %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128 31 %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4 32 %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128 33 %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8 34 %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8 35 %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128 36 %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5 37 %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128 38 %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10 39 %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10 40 %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128 41 %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6 42 %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128 43 %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12 44 %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128 45 %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12 46 %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128 47 %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14 48 %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128 49 %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14 50 %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128 51 %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11) 52 %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2) 53 %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11) 54 %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3) 55 %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef) 56 %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15) 57 %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7) 58 %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15) 59 %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8) 60 %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16) 61 %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9) 62 %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17) 63 %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13) 64 %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4) 65 %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13) 66 %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5) 67 %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef) 68 %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6) 69 %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14) 70 %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef) 71 %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer) 72 %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10) 73 %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18) 74 %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef) 75 %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19) 76 %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26) 77 %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21) 78 %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26) 79 %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23) 80 %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28) 81 %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24) 82 %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30) 83 %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27) 84 %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22) 85 %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27) 86 %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef) 87 %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31) 88 %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39) 89 %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33) 90 %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39) 91 %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35) 92 %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37) 93 %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40) 94 %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36) 95 %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44) 96 %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58) 97 %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46) 98 %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48) 99 %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50) 100 %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59) 101 %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer) 102 %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer) 103 %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55) 104 %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef) 105 %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64) 106 %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67) 107 %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef) 108 %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72) 109 %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer) 110 %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537) 111 %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537) 112 %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537) 113 %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537) 114 %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537) 115 %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537) 116 %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537) 117 %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537) 118 store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128 119 store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128 120 store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128 121 store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128 122 store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128 123 store <32 x i32> zeroinitializer, <32 x i32>* null, align 128 124 %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16 125 %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16 126 br i1 false, label %for.end, label %for.body 127 128for.end: 129 %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86) 130 ret void 131} 132 133declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1 134 135declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1 136 137declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1 138 139declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1 140 141declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 142 143attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 144attributes #1 = { nounwind readnone } 145