1; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=no -analyze < %s | FileCheck %s 2; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-tiling=0 -polly-parallel -polly-opt-outer-coincidence=yes -analyze < %s | FileCheck %s --check-prefix=OUTER 3 4; By skewing, the diagonal can be made parallel. ISL does this when the Check 5; the 'outer_coincidence' option is enabled. 6; 7; void func(int m, int n, float A[static const restrict m][n]) { 8; for (int i = 1; i < m; i+=1) 9; for (int j = 1; j < n; j+=1) 10; A[i][j] = A[i-1][j] + A[i][j-1]; 11;} 12 13define void @func(i64 %m, i64 %n, float* noalias nonnull %A) #0 { 14entry: 15 br label %for.cond 16 17for.cond: ; preds = %for.inc11, %entry 18 %i.0 = phi i64 [ 1, %entry ], [ %add12, %for.inc11 ] 19 %cmp = icmp slt i64 %i.0, %m 20 br i1 %cmp, label %for.cond1.preheader, label %for.end13 21 22for.cond1.preheader: ; preds = %for.cond 23 br label %for.cond1 24 25for.cond1: ; preds = %for.cond1.preheader, %for.body3 26 %j.0 = phi i64 [ %add10, %for.body3 ], [ 1, %for.cond1.preheader ] 27 %cmp2 = icmp slt i64 %j.0, %n 28 br i1 %cmp2, label %for.body3, label %for.inc11 29 30for.body3: ; preds = %for.cond1 31 %sub = add nsw i64 %i.0, -1 32 %tmp = mul nsw i64 %sub, %n 33 %arrayidx = getelementptr inbounds float, float* %A, i64 %tmp 34 %arrayidx4 = getelementptr inbounds float, float* %arrayidx, i64 %j.0 35 %tmp13 = load float, float* %arrayidx4, align 4 36 %sub5 = add nsw i64 %j.0, -1 37 %tmp14 = mul nsw i64 %i.0, %n 38 %arrayidx6 = getelementptr inbounds float, float* %A, i64 %tmp14 39 %arrayidx7 = getelementptr inbounds float, float* %arrayidx6, i64 %sub5 40 %tmp15 = load float, float* %arrayidx7, align 4 41 %add = fadd float %tmp13, %tmp15 42 %tmp16 = mul nsw i64 %i.0, %n 43 %arrayidx8 = getelementptr inbounds float, float* %A, i64 %tmp16 44 %arrayidx9 = getelementptr inbounds float, float* %arrayidx8, i64 %j.0 45 store float %add, float* %arrayidx9, align 4 46 %add10 = add nuw nsw i64 %j.0, 1 47 br label %for.cond1 48 49for.inc11: ; preds = %for.cond1 50 %add12 = add nuw nsw i64 %i.0, 1 51 br label %for.cond 52 53for.end13: ; preds = %for.cond 54 ret void 55} 56 57 58; CHECK: #pragma minimal dependence distance: 1 59; CHECK-NEXT: for (int c0 = 0; c0 < m - 1; c0 += 1) 60; CHECK-NEXT: #pragma minimal dependence distance: 1 61; CHECK-NEXT: for (int c1 = 0; c1 < n - 1; c1 += 1) 62; CHECK-NEXT: Stmt_for_body3(c0, c1); 63 64; OUTER: #pragma minimal dependence distance: 1 65; OUTER-NEXT: for (int c0 = 0; c0 < m + n - 3; c0 += 1) 66; OUTER-NEXT: #pragma simd 67; OUTER-NEXT: #pragma known-parallel 68; OUTER-NEXT: for (int c1 = max(0, -n + c0 + 2); c1 <= min(m - 2, c0); c1 += 1) 69; OUTER-NEXT: Stmt_for_body3(c1, c0 - c1); 70