1; RUN: opt %loadPolly -analyze -polly-process-unprofitable -polly-remarks-minimal \ 2; RUN: -polly-opt-isl -polly-pattern-matching-based-opts=true \ 3; RUN: -polly-target-throughput-vector-fma=1 \ 4; RUN: -polly-target-latency-vector-fma=1 \ 5; RUN: -polly-ast -polly-target-vector-register-bitwidth=4096 \ 6; RUN: -polly-target-1st-cache-level-associativity=3 < %s | FileCheck %s 7; 8; /* Test that Polly does not crash due to configurations that can lead to 9; incorrect tile size computations. 10; The parameters are setup such that Car in `getMacroKernelParams` 11; is evaluated to 0. */ 12; 13; static const int N = 3000; 14; 15; void f(int A[N][N], int B[N][N], int C[N][N]) { 16; for (int i = 0; i < N; i++) { 17; for (int j = 0; j < N; j++) { 18; A[i][j] = 0; 19; for (int k = 0; k < N; k++) { 20; A[i][j] += B[i][k] * C[k][j]; 21; } 22; } 23; } 24; } 25; 26; CHECK: // 1st level tiling - Tiles 27; CHECK-NEXT: for (int c0 = 0; c0 <= 93; c0 += 1) 28; CHECK-NEXT: for (int c1 = 0; c1 <= 93; c1 += 1) { 29; CHECK-NEXT: // 1st level tiling - Points 30; CHECK-NEXT: for (int c2 = 0; c2 <= min(31, -32 * c0 + 2999); c2 += 1) 31; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, -32 * c1 + 2999); c3 += 1) 32; CHECK-NEXT: Stmt_for_body3(32 * c0 + c2, 32 * c1 + c3); 33; CHECK-NEXT: } 34; CHECK-NEXT: // Inter iteration alias-free 35; CHECK-NEXT: // Register tiling - Tiles 36; CHECK-NEXT: for (int c0 = 0; c0 <= 23; c0 += 1) 37; CHECK-NEXT: for (int c1 = 0; c1 <= 2999; c1 += 1) 38; CHECK-NEXT: for (int c2 = 0; c2 <= 2999; c2 += 1) { 39; CHECK-NEXT: // Register tiling - Points 40; CHECK-NEXT: { 41; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0, c2); 42; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 1, c2); 43; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 2, c2); 44; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 3, c2); 45; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 4, c2); 46; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 5, c2); 47; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 6, c2); 48; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 7, c2); 49; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 8, c2); 50; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 9, c2); 51; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 10, c2); 52; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 11, c2); 53; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 12, c2); 54; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 13, c2); 55; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 14, c2); 56; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 15, c2); 57; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 16, c2); 58; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 17, c2); 59; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 18, c2); 60; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 19, c2); 61; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 20, c2); 62; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 21, c2); 63; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 22, c2); 64; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 23, c2); 65; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 24, c2); 66; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 25, c2); 67; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 26, c2); 68; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 27, c2); 69; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 28, c2); 70; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 29, c2); 71; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 30, c2); 72; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 31, c2); 73; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 32, c2); 74; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 33, c2); 75; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 34, c2); 76; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 35, c2); 77; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 36, c2); 78; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 37, c2); 79; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 38, c2); 80; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 39, c2); 81; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 40, c2); 82; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 41, c2); 83; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 42, c2); 84; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 43, c2); 85; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 44, c2); 86; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 45, c2); 87; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 46, c2); 88; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 47, c2); 89; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 48, c2); 90; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 49, c2); 91; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 50, c2); 92; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 51, c2); 93; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 52, c2); 94; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 53, c2); 95; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 54, c2); 96; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 55, c2); 97; CHECK-NEXT: if (c0 <= 22) { 98; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 56, c2); 99; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 57, c2); 100; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 58, c2); 101; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 59, c2); 102; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 60, c2); 103; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 61, c2); 104; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 62, c2); 105; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 63, c2); 106; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 64, c2); 107; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 65, c2); 108; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 66, c2); 109; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 67, c2); 110; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 68, c2); 111; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 69, c2); 112; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 70, c2); 113; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 71, c2); 114; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 72, c2); 115; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 73, c2); 116; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 74, c2); 117; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 75, c2); 118; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 76, c2); 119; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 77, c2); 120; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 78, c2); 121; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 79, c2); 122; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 80, c2); 123; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 81, c2); 124; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 82, c2); 125; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 83, c2); 126; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 84, c2); 127; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 85, c2); 128; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 86, c2); 129; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 87, c2); 130; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 88, c2); 131; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 89, c2); 132; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 90, c2); 133; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 91, c2); 134; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 92, c2); 135; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 93, c2); 136; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 94, c2); 137; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 95, c2); 138; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 96, c2); 139; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 97, c2); 140; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 98, c2); 141; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 99, c2); 142; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 100, c2); 143; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 101, c2); 144; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 102, c2); 145; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 103, c2); 146; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 104, c2); 147; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 105, c2); 148; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 106, c2); 149; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 107, c2); 150; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 108, c2); 151; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 109, c2); 152; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 110, c2); 153; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 111, c2); 154; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 112, c2); 155; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 113, c2); 156; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 114, c2); 157; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 115, c2); 158; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 116, c2); 159; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 117, c2); 160; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 118, c2); 161; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 119, c2); 162; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 120, c2); 163; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 121, c2); 164; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 122, c2); 165; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 123, c2); 166; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 124, c2); 167; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 125, c2); 168; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 126, c2); 169; CHECK-NEXT: Stmt_for_body8(c1, 128 * c0 + 127, c2); 170; CHECK-NEXT: } 171; CHECK-NEXT: } 172; CHECK-NEXT: } 173target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 174 175define void @f([3000 x i32]* %A, [3000 x i32]* %B, [3000 x i32]* %C) { 176entry: 177 br label %for.cond 178 179for.cond: ; preds = %for.inc24, %entry 180 %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc24 ], [ 0, %entry ] 181 %exitcond6 = icmp ne i64 %indvars.iv4, 3000 182 br i1 %exitcond6, label %for.body, label %for.end26 183 184for.body: ; preds = %for.cond 185 br label %for.cond1 186 187for.cond1: ; preds = %for.inc21, %for.body 188 %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc21 ], [ 0, %for.body ] 189 %exitcond3 = icmp ne i64 %indvars.iv1, 3000 190 br i1 %exitcond3, label %for.body3, label %for.end23 191 192for.body3: ; preds = %for.cond1 193 %arrayidx5 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1 194 store i32 0, i32* %arrayidx5, align 4 195 br label %for.cond6 196 197for.cond6: ; preds = %for.inc, %for.body3 198 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body3 ] 199 %exitcond = icmp ne i64 %indvars.iv, 3000 200 br i1 %exitcond, label %for.body8, label %for.end 201 202for.body8: ; preds = %for.cond6 203 %arrayidx12 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %indvars.iv4, i64 %indvars.iv 204 %tmp = load i32, i32* %arrayidx12, align 4 205 %arrayidx16 = getelementptr inbounds [3000 x i32], [3000 x i32]* %C, i64 %indvars.iv, i64 %indvars.iv1 206 %tmp7 = load i32, i32* %arrayidx16, align 4 207 %mul = mul nsw i32 %tmp, %tmp7 208 %arrayidx20 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1 209 %tmp8 = load i32, i32* %arrayidx20, align 4 210 %add = add nsw i32 %tmp8, %mul 211 store i32 %add, i32* %arrayidx20, align 4 212 br label %for.inc 213 214for.inc: ; preds = %for.body8 215 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 216 br label %for.cond6 217 218for.end: ; preds = %for.cond6 219 br label %for.inc21 220 221for.inc21: ; preds = %for.end 222 %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 223 br label %for.cond1 224 225for.end23: ; preds = %for.cond1 226 br label %for.inc24 227 228for.inc24: ; preds = %for.end23 229 %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 230 br label %for.cond 231 232for.end26: ; preds = %for.cond 233 ret void 234} 235