1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
3
4# movss/movsd explicitly zeroes out the high bits of xmm,
5# so addps can start immediately, without waiting for sqrtss to finish.
6# AMD SOG for the AMD family 15h processors, 5.5 Partial-Register Writes
7
8# LLVM-MCA-BEGIN
9sqrtss %xmm0, %xmm0
10movss  (%eax), %xmm0
11addps  %xmm0, %xmm0
12# LLVM-MCA-END
13
14# LLVM-MCA-BEGIN
15sqrtsd %xmm0, %xmm0
16movsd  (%eax), %xmm0
17addps  %xmm0, %xmm0
18# LLVM-MCA-END
19
20# CHECK:      [0] Code Region
21
22# CHECK:      Iterations:        100
23# CHECK-NEXT: Instructions:      300
24# CHECK-NEXT: Total Cycles:      655
25# CHECK-NEXT: Total uOps:        300
26
27# CHECK:      Dispatch Width:    4
28# CHECK-NEXT: uOps Per Cycle:    0.46
29# CHECK-NEXT: IPC:               0.46
30# CHECK-NEXT: Block RThroughput: 6.5
31
32# CHECK:      Instruction Info:
33# CHECK-NEXT: [1]: #uOps
34# CHECK-NEXT: [2]: Latency
35# CHECK-NEXT: [3]: RThroughput
36# CHECK-NEXT: [4]: MayLoad
37# CHECK-NEXT: [5]: MayStore
38# CHECK-NEXT: [6]: HasSideEffects (U)
39
40# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
41# CHECK-NEXT:  1      9     4.50                        sqrtss	%xmm0, %xmm0
42# CHECK-NEXT:  1      5     1.50    *                   movss	(%eax), %xmm0
43# CHECK-NEXT:  1      5     1.00                        addps	%xmm0, %xmm0
44
45# CHECK:      Timeline view:
46# CHECK-NEXT:                     0123456789
47# CHECK-NEXT: Index     0123456789          012
48
49# CHECK:      [0,0]     D=eeeeeeeeeER  .    . .   sqrtss	%xmm0, %xmm0
50# CHECK-NEXT: [0,1]     DeeeeeE-----R  .    . .   movss	(%eax), %xmm0
51# CHECK-NEXT: [0,2]     D======eeeeeER .    . .   addps	%xmm0, %xmm0
52# CHECK-NEXT: [1,0]     D===========eeeeeeeeeER   sqrtss	%xmm0, %xmm0
53# CHECK-NEXT: [1,1]     .D==eeeeeE------------R   movss	(%eax), %xmm0
54# CHECK-NEXT: [1,2]     .D=========eeeeeE-----R   addps	%xmm0, %xmm0
55
56# CHECK:      Average Wait times (based on the timeline view):
57# CHECK-NEXT: [0]: Executions
58# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
59# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
60# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
61
62# CHECK:            [0]    [1]    [2]    [3]
63# CHECK-NEXT: 0.     2     7.0    1.0    0.0       sqrtss	%xmm0, %xmm0
64# CHECK-NEXT: 1.     2     2.0    2.0    8.5       movss	(%eax), %xmm0
65# CHECK-NEXT: 2.     2     8.5    1.5    2.5       addps	%xmm0, %xmm0
66# CHECK-NEXT:        2     5.8    1.5    3.7       <total>
67
68# CHECK:      [1] Code Region
69
70# CHECK:      Iterations:        100
71# CHECK-NEXT: Instructions:      300
72# CHECK-NEXT: Total Cycles:      655
73# CHECK-NEXT: Total uOps:        300
74
75# CHECK:      Dispatch Width:    4
76# CHECK-NEXT: uOps Per Cycle:    0.46
77# CHECK-NEXT: IPC:               0.46
78# CHECK-NEXT: Block RThroughput: 6.5
79
80# CHECK:      Instruction Info:
81# CHECK-NEXT: [1]: #uOps
82# CHECK-NEXT: [2]: Latency
83# CHECK-NEXT: [3]: RThroughput
84# CHECK-NEXT: [4]: MayLoad
85# CHECK-NEXT: [5]: MayStore
86# CHECK-NEXT: [6]: HasSideEffects (U)
87
88# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
89# CHECK-NEXT:  1      9     4.50                        sqrtsd	%xmm0, %xmm0
90# CHECK-NEXT:  1      5     1.50    *                   movsd	(%eax), %xmm0
91# CHECK-NEXT:  1      5     1.00                        addps	%xmm0, %xmm0
92
93# CHECK:      Timeline view:
94# CHECK-NEXT:                     0123456789
95# CHECK-NEXT: Index     0123456789          012
96
97# CHECK:      [0,0]     D=eeeeeeeeeER  .    . .   sqrtsd	%xmm0, %xmm0
98# CHECK-NEXT: [0,1]     DeeeeeE-----R  .    . .   movsd	(%eax), %xmm0
99# CHECK-NEXT: [0,2]     D======eeeeeER .    . .   addps	%xmm0, %xmm0
100# CHECK-NEXT: [1,0]     D===========eeeeeeeeeER   sqrtsd	%xmm0, %xmm0
101# CHECK-NEXT: [1,1]     .D==eeeeeE------------R   movsd	(%eax), %xmm0
102# CHECK-NEXT: [1,2]     .D=========eeeeeE-----R   addps	%xmm0, %xmm0
103
104# CHECK:      Average Wait times (based on the timeline view):
105# CHECK-NEXT: [0]: Executions
106# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
107# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
108# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
109
110# CHECK:            [0]    [1]    [2]    [3]
111# CHECK-NEXT: 0.     2     7.0    1.0    0.0       sqrtsd	%xmm0, %xmm0
112# CHECK-NEXT: 1.     2     2.0    2.0    8.5       movsd	(%eax), %xmm0
113# CHECK-NEXT: 2.     2     8.5    1.5    2.5       addps	%xmm0, %xmm0
114# CHECK-NEXT:        2     5.8    1.5    3.7       <total>
115