1; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s 2; REQUIRES: asserts 3 4; @sharedidx is an unrolled variant of this loop: 5; for (unsigned long i = 0; i < len; i += s) { 6; c[i] = a[i] + b[i]; 7; } 8; where 's' cannot be folded into the addressing mode. 9; 10; This is not quite profitable to chain. But with -stress-ivchain, we 11; can form three address chains in place of the shared induction 12; variable. 13 14; rdar://10674430 15define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { 16entry: 17; CHECK-LABEL: sharedidx: 18 %cmp8 = icmp eq i32 %len, 0 19 br i1 %cmp8, label %for.end, label %for.body 20 21for.body: ; preds = %entry, %for.body.3 22; CHECK: %for.body 23; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 24; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 25 %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ] 26 %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09 27 %0 = load i8, i8* %arrayidx, align 1 28 %conv6 = zext i8 %0 to i32 29 %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09 30 %1 = load i8, i8* %arrayidx1, align 1 31 %conv27 = zext i8 %1 to i32 32 %add = add nsw i32 %conv27, %conv6 33 %conv3 = trunc i32 %add to i8 34 %arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09 35 store i8 %conv3, i8* %arrayidx4, align 1 36 %add5 = add i32 %i.09, %s 37 %cmp = icmp ult i32 %add5, %len 38 br i1 %cmp, label %for.body.1, label %for.end 39 40for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry 41 ret void 42 43for.body.1: ; preds = %for.body 44; CHECK: %for.body.1 45; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 46; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 47 %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5 48 %2 = load i8, i8* %arrayidx.1, align 1 49 %conv6.1 = zext i8 %2 to i32 50 %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5 51 %3 = load i8, i8* %arrayidx1.1, align 1 52 %conv27.1 = zext i8 %3 to i32 53 %add.1 = add nsw i32 %conv27.1, %conv6.1 54 %conv3.1 = trunc i32 %add.1 to i8 55 %arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5 56 store i8 %conv3.1, i8* %arrayidx4.1, align 1 57 %add5.1 = add i32 %add5, %s 58 %cmp.1 = icmp ult i32 %add5.1, %len 59 br i1 %cmp.1, label %for.body.2, label %for.end 60 61for.body.2: ; preds = %for.body.1 62; CHECK: %for.body.2 63; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 64; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 65 %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1 66 %4 = load i8, i8* %arrayidx.2, align 1 67 %conv6.2 = zext i8 %4 to i32 68 %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1 69 %5 = load i8, i8* %arrayidx1.2, align 1 70 %conv27.2 = zext i8 %5 to i32 71 %add.2 = add nsw i32 %conv27.2, %conv6.2 72 %conv3.2 = trunc i32 %add.2 to i8 73 %arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1 74 store i8 %conv3.2, i8* %arrayidx4.2, align 1 75 %add5.2 = add i32 %add5.1, %s 76 %cmp.2 = icmp ult i32 %add5.2, %len 77 br i1 %cmp.2, label %for.body.3, label %for.end 78 79for.body.3: ; preds = %for.body.2 80; CHECK: %for.body.3 81; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 82; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! 83 %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2 84 %6 = load i8, i8* %arrayidx.3, align 1 85 %conv6.3 = zext i8 %6 to i32 86 %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2 87 %7 = load i8, i8* %arrayidx1.3, align 1 88 %conv27.3 = zext i8 %7 to i32 89 %add.3 = add nsw i32 %conv27.3, %conv6.3 90 %conv3.3 = trunc i32 %add.3 to i8 91 %arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2 92 store i8 %conv3.3, i8* %arrayidx4.3, align 1 93 %add5.3 = add i32 %add5.2, %s 94 %cmp.3 = icmp ult i32 %add5.3, %len 95 br i1 %cmp.3, label %for.body, label %for.end 96} 97