1; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
2; RUN:   -S < %s | FileCheck %s
3
4; void f(long A[], long N) {
5;   long i;
6;   if (true)
7;     for (i = 0; i < N; ++i)
8;       A[i] = i;
9; }
10
11target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
12target triple = "x86_64-unknown-linux-gnu"
13
14define void @f(i64* %A, i64 %N) nounwind {
15entry:
16  fence seq_cst
17  br label %next
18
19next:
20  br i1 true, label %for.i, label %return
21
22for.i:
23  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
24  %scevgep = getelementptr i64, i64* %A, i64 %indvar
25  store i64 %indvar, i64* %scevgep
26  %indvar.next = add nsw i64 %indvar, 1
27  %exitcond = icmp eq i64 %indvar.next, %N
28  br i1 %exitcond, label %return, label %for.i
29
30return:
31  fence seq_cst
32  ret void
33}
34
35; CHECK:      @__polly_perf_cycles_total_start = weak thread_local(initialexec) constant i64 0
36; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false
37; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0
38; CHECK-NEXT: @__polly_perf_cycles_in_scop_start = weak thread_local(initialexec) constant i64 0
39
40; CHECK:      polly.split_new_and_old:                          ; preds = %entry
41; CHECK-NEXT:   %0 = call { i64, i32 } @llvm.x86.rdtscp()
42; CHECK-NEXT:   %1 = extractvalue { i64, i32 } %0, 0
43; CHECK-NEXT:   store volatile i64 %1, i64* @__polly_perf_cycles_in_scop_start
44
45; CHECK:      polly.merge_new_and_old:                          ; preds = %polly.exiting, %return.region_exiting
46; CHECK-NEXT:   %6 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
47; CHECK-NEXT:   %7 = call { i64, i32 } @llvm.x86.rdtscp()
48; CHECK-NEXT:   %8 = extractvalue { i64, i32 } %7, 0
49; CHECK-NEXT:   %9 = sub i64 %8, %6
50; CHECK-NEXT:   %10 = load volatile i64, i64* @__polly_perf_cycles_in_scops
51; CHECK-NEXT:   %11 = add i64 %10, %9
52; CHECK-NEXT:   store volatile i64 %11, i64* @__polly_perf_cycles_in_scops
53
54
55; CHECK:      define weak_odr void @__polly_perf_final() {
56; CHECK-NEXT: start:
57; CHECK-NEXT:   %0 = call { i64, i32 } @llvm.x86.rdtscp()
58; CHECK-NEXT:   %1 = extractvalue { i64, i32 } %0, 0
59; CHECK-NEXT:   %2 = load volatile i64, i64* @__polly_perf_cycles_total_start
60; CHECK-NEXT:   %3 = sub i64 %1, %2
61; CHECK-NEXT:   %4 = load volatile i64, i64* @__polly_perf_cycles_in_scops
62; CHECK-NEXT:   %5 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0))
63; CHECK-NEXT:   %6 = call i32 @fflush(i8* null)
64; CHECK-NEXT:   %7 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0))
65; CHECK-NEXT:   %8 = call i32 @fflush(i8* null)
66; CHECK-NEXT:   %9 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0))
67; CHECK-NEXT:   %10 = call i32 @fflush(i8* null)
68; CHECK-NEXT:   %11 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %4, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
69; CHECK-NEXT:   %12 = call i32 @fflush(i8* null)
70
71
72; CHECK:      define weak_odr void @__polly_perf_init() {
73; CHECK-NEXT: start:
74; CHECK-NEXT:   %0 = load i1, i1* @__polly_perf_initialized
75; CHECK-NEXT:   br i1 %0, label %earlyreturn, label %initbb
76
77; CHECK:      earlyreturn:                                      ; preds = %start
78; CHECK-NEXT:   ret void
79
80; CHECK:      initbb:                                           ; preds = %start
81; CHECK-NEXT:   store i1 true, i1* @__polly_perf_initialized
82; CHECK-NEXT:   %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*))
83; CHECK-NEXT:   %2 = call { i64, i32 } @llvm.x86.rdtscp()
84; CHECK-NEXT:   %3 = extractvalue { i64, i32 } %2, 0
85; CHECK-NEXT:   store volatile i64 %3, i64* @__polly_perf_cycles_total_start
86; CHECK-NEXT:   ret void
87; CHECK-NEXT: }
88