1; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ 2; RUN: -S < %s | FileCheck %s 3 4; void f(long A[], long N) { 5; long i; 6; if (true) 7; for (i = 0; i < N; ++i) 8; A[i] = i; 9; } 10 11target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 12target triple = "x86_64-unknown-linux-gnu" 13 14define void @f(i64* %A, i64 %N) nounwind { 15entry: 16 fence seq_cst 17 br label %next 18 19next: 20 br i1 true, label %for.i, label %return 21 22for.i: 23 %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] 24 %scevgep = getelementptr i64, i64* %A, i64 %indvar 25 store i64 %indvar, i64* %scevgep 26 %indvar.next = add nsw i64 %indvar, 1 27 %exitcond = icmp eq i64 %indvar.next, %N 28 br i1 %exitcond, label %return, label %for.i 29 30return: 31 fence seq_cst 32 ret void 33} 34 35; CHECK: @__polly_perf_cycles_total_start = weak thread_local(initialexec) constant i64 0 36; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false 37; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0 38; CHECK-NEXT: @__polly_perf_cycles_in_scop_start = weak thread_local(initialexec) constant i64 0 39 40; CHECK: polly.split_new_and_old: ; preds = %entry 41; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp() 42; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0 43; CHECK-NEXT: store volatile i64 %1, i64* @__polly_perf_cycles_in_scop_start 44 45; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting 46; CHECK-NEXT: %6 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start 47; CHECK-NEXT: %7 = call { i64, i32 } @llvm.x86.rdtscp() 48; CHECK-NEXT: %8 = extractvalue { i64, i32 } %7, 0 49; CHECK-NEXT: %9 = sub i64 %8, %6 50; CHECK-NEXT: %10 = load volatile i64, i64* @__polly_perf_cycles_in_scops 51; CHECK-NEXT: %11 = add i64 %10, %9 52; CHECK-NEXT: store volatile i64 %11, i64* @__polly_perf_cycles_in_scops 53 54 55; CHECK: define weak_odr void @__polly_perf_final() { 56; CHECK-NEXT: start: 57; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp() 58; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0 59; CHECK-NEXT: %2 = load volatile i64, i64* @__polly_perf_cycles_total_start 60; CHECK-NEXT: %3 = sub i64 %1, %2 61; CHECK-NEXT: %4 = load volatile i64, i64* @__polly_perf_cycles_in_scops 62; CHECK-NEXT: %5 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0)) 63; CHECK-NEXT: %6 = call i32 @fflush(i8* null) 64; CHECK-NEXT: %7 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0)) 65; CHECK-NEXT: %8 = call i32 @fflush(i8* null) 66; CHECK-NEXT: %9 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0)) 67; CHECK-NEXT: %10 = call i32 @fflush(i8* null) 68; CHECK-NEXT: %11 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %4, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0)) 69; CHECK-NEXT: %12 = call i32 @fflush(i8* null) 70 71 72; CHECK: define weak_odr void @__polly_perf_init() { 73; CHECK-NEXT: start: 74; CHECK-NEXT: %0 = load i1, i1* @__polly_perf_initialized 75; CHECK-NEXT: br i1 %0, label %earlyreturn, label %initbb 76 77; CHECK: earlyreturn: ; preds = %start 78; CHECK-NEXT: ret void 79 80; CHECK: initbb: ; preds = %start 81; CHECK-NEXT: store i1 true, i1* @__polly_perf_initialized 82; CHECK-NEXT: %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*)) 83; CHECK-NEXT: %2 = call { i64, i32 } @llvm.x86.rdtscp() 84; CHECK-NEXT: %3 = extractvalue { i64, i32 } %2, 0 85; CHECK-NEXT: store volatile i64 %3, i64* @__polly_perf_cycles_total_start 86; CHECK-NEXT: ret void 87; CHECK-NEXT: } 88