1; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4target triple = "aarch64" 5 6; CHECK-LABEL: @add_a( 7; CHECK: load <16 x i8>, <16 x i8>* 8; CHECK: add nuw nsw <16 x i8> 9; CHECK: store <16 x i8> 10; Function Attrs: nounwind 11define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { 12entry: 13 %cmp8 = icmp sgt i32 %len, 0 14 br i1 %cmp8, label %for.body, label %for.cond.cleanup 15 16for.cond.cleanup: ; preds = %for.body, %entry 17 ret void 18 19for.body: ; preds = %entry, %for.body 20 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 21 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 22 %0 = load i8, i8* %arrayidx 23 %conv = zext i8 %0 to i32 24 %add = add nuw nsw i32 %conv, 2 25 %conv1 = trunc i32 %add to i8 26 %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 27 store i8 %conv1, i8* %arrayidx3 28 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 29 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 30 %exitcond = icmp eq i32 %lftr.wideiv, %len 31 br i1 %exitcond, label %for.cond.cleanup, label %for.body 32} 33 34; CHECK-LABEL: @add_b( 35; CHECK: load <8 x i16>, <8 x i16>* 36; CHECK: add nuw nsw <8 x i16> 37; CHECK: store <8 x i16> 38; Function Attrs: nounwind 39define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { 40entry: 41 %cmp9 = icmp sgt i32 %len, 0 42 br i1 %cmp9, label %for.body, label %for.cond.cleanup 43 44for.cond.cleanup: ; preds = %for.body, %entry 45 ret void 46 47for.body: ; preds = %entry, %for.body 48 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 49 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 50 %0 = load i16, i16* %arrayidx 51 %conv8 = zext i16 %0 to i32 52 %add = add nuw nsw i32 %conv8, 2 53 %conv1 = trunc i32 %add to i16 54 %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv 55 store i16 %conv1, i16* %arrayidx3 56 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 57 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 58 %exitcond = icmp eq i32 %lftr.wideiv, %len 59 br i1 %exitcond, label %for.cond.cleanup, label %for.body 60} 61 62; CHECK-LABEL: @add_c( 63; CHECK: load <8 x i8>, <8 x i8>* 64; CHECK: add nuw nsw <8 x i16> 65; CHECK: store <8 x i16> 66; Function Attrs: nounwind 67define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { 68entry: 69 %cmp8 = icmp sgt i32 %len, 0 70 br i1 %cmp8, label %for.body, label %for.cond.cleanup 71 72for.cond.cleanup: ; preds = %for.body, %entry 73 ret void 74 75for.body: ; preds = %entry, %for.body 76 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 77 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 78 %0 = load i8, i8* %arrayidx 79 %conv = zext i8 %0 to i32 80 %add = add nuw nsw i32 %conv, 2 81 %conv1 = trunc i32 %add to i16 82 %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv 83 store i16 %conv1, i16* %arrayidx3 84 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 85 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 86 %exitcond = icmp eq i32 %lftr.wideiv, %len 87 br i1 %exitcond, label %for.cond.cleanup, label %for.body 88} 89 90; CHECK-LABEL: @add_d( 91; CHECK: load <4 x i16> 92; CHECK: add nsw <4 x i32> 93; CHECK: store <4 x i32> 94define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 { 95entry: 96 %cmp7 = icmp sgt i32 %len, 0 97 br i1 %cmp7, label %for.body, label %for.cond.cleanup 98 99for.cond.cleanup: ; preds = %for.body, %entry 100 ret void 101 102for.body: ; preds = %entry, %for.body 103 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 104 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 105 %0 = load i16, i16* %arrayidx 106 %conv = sext i16 %0 to i32 107 %add = add nsw i32 %conv, 2 108 %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv 109 store i32 %add, i32* %arrayidx2 110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 111 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 112 %exitcond = icmp eq i32 %lftr.wideiv, %len 113 br i1 %exitcond, label %for.cond.cleanup, label %for.body 114} 115 116; CHECK-LABEL: @add_e( 117; CHECK: load <16 x i8> 118; CHECK: shl <16 x i8> 119; CHECK: add nuw nsw <16 x i8> 120; CHECK: or <16 x i8> 121; CHECK: mul nuw nsw <16 x i8> 122; CHECK: and <16 x i8> 123; CHECK: xor <16 x i8> 124; CHECK: mul nuw nsw <16 x i8> 125; CHECK: store <16 x i8> 126define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 127entry: 128 %cmp.32 = icmp sgt i32 %len, 0 129 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 130 131for.body.lr.ph: ; preds = %entry 132 %conv11 = zext i8 %arg2 to i32 133 %conv13 = zext i8 %arg1 to i32 134 br label %for.body 135 136for.cond.cleanup: ; preds = %for.body, %entry 137 ret void 138 139for.body: ; preds = %for.body, %for.body.lr.ph 140 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 141 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 142 %0 = load i8, i8* %arrayidx 143 %conv = zext i8 %0 to i32 144 %add = shl i32 %conv, 4 145 %conv2 = add nuw nsw i32 %add, 32 146 %or = or i32 %conv, 51 147 %mul = mul nuw nsw i32 %or, 60 148 %and = and i32 %conv2, %conv13 149 %mul.masked = and i32 %mul, 252 150 %conv17 = xor i32 %mul.masked, %conv11 151 %mul18 = mul nuw nsw i32 %conv17, %and 152 %conv19 = trunc i32 %mul18 to i8 153 %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 154 store i8 %conv19, i8* %arrayidx21 155 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 156 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 157 %exitcond = icmp eq i32 %lftr.wideiv, %len 158 br i1 %exitcond, label %for.cond.cleanup, label %for.body 159} 160 161; CHECK-LABEL: @add_f 162; CHECK: load <8 x i16> 163; CHECK: trunc <8 x i16> 164; CHECK: shl <8 x i8> 165; CHECK: add nsw <8 x i8> 166; CHECK: or <8 x i8> 167; CHECK: mul nuw nsw <8 x i8> 168; CHECK: and <8 x i8> 169; CHECK: xor <8 x i8> 170; CHECK: mul nuw nsw <8 x i8> 171; CHECK: store <8 x i8> 172define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 173entry: 174 %cmp.32 = icmp sgt i32 %len, 0 175 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 176 177for.body.lr.ph: ; preds = %entry 178 %conv11 = zext i8 %arg2 to i32 179 %conv13 = zext i8 %arg1 to i32 180 br label %for.body 181 182for.cond.cleanup: ; preds = %for.body, %entry 183 ret void 184 185for.body: ; preds = %for.body, %for.body.lr.ph 186 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 187 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv 188 %0 = load i16, i16* %arrayidx 189 %conv = sext i16 %0 to i32 190 %add = shl i32 %conv, 4 191 %conv2 = add nsw i32 %add, 32 192 %or = and i32 %conv, 204 193 %conv8 = or i32 %or, 51 194 %mul = mul nuw nsw i32 %conv8, 60 195 %and = and i32 %conv2, %conv13 196 %mul.masked = and i32 %mul, 252 197 %conv17 = xor i32 %mul.masked, %conv11 198 %mul18 = mul nuw nsw i32 %conv17, %and 199 %conv19 = trunc i32 %mul18 to i8 200 %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 201 store i8 %conv19, i8* %arrayidx21 202 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 203 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 204 %exitcond = icmp eq i32 %lftr.wideiv, %len 205 br i1 %exitcond, label %for.cond.cleanup, label %for.body 206} 207 208; CHECK-LABEL: @add_g 209; CHECK: load <16 x i8> 210; CHECK: xor <16 x i8> 211; CHECK: icmp ult <16 x i8> 212; CHECK: select <16 x i1> {{.*}}, <16 x i8> 213; CHECK: store <16 x i8> 214define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 { 215 %1 = icmp sgt i32 %len, 0 216 br i1 %1, label %.lr.ph, label %._crit_edge 217 218.lr.ph: ; preds = %0 219 %2 = sext i8 %arg1 to i64 220 br label %3 221 222._crit_edge: ; preds = %3, %0 223 ret void 224 225; <label>:3 ; preds = %3, %.lr.ph 226 %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ] 227 %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv 228 %x5 = load i8, i8* %x4 229 %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv 230 %x8 = load i8, i8* %x7 231 %x9 = zext i8 %x5 to i32 232 %x10 = xor i32 %x9, 255 233 %x11 = icmp ult i32 %x10, 24 234 %x12 = select i1 %x11, i32 %x10, i32 24 235 %x13 = trunc i32 %x12 to i8 236 store i8 %x13, i8* %x4 237 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 238 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 239 %exitcond = icmp eq i32 %lftr.wideiv, %len 240 br i1 %exitcond, label %._crit_edge, label %3 241} 242 243attributes #0 = { nounwind } 244