1#!/usr/bin/python2.7
2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import contextlib
7import itertools
8import os
9import os.path
10import re
11import subprocess
12
13import numpy
14
15import stats_utils
16
17
18class Error(Exception):
19    """Module error class."""
20
21class TestFail(Error):
22    """Indicates a test condition failed (as opposed to tool failure)."""
23
24
25@contextlib.contextmanager
26def CleanupFile(path):
27    """Context manager that deletes path on exit."""
28    try:
29        yield
30    finally:
31        os.remove(path)
32
33
34DEVNULL = open('/dev/null', 'w')
35
36
37class Mmap(object):
38    """Represents a memory map, and does the (un)mapping arithmetic."""
39
40    def __init__(self, start, length, pgoff):
41        self.start = start
42        self.length = length
43        self.pgoff = pgoff
44
45    def __repr__(self):
46        return '[%x(%x) @ %x]' % (self.start, self.length, self.pgoff)
47
48    def Map(self, ip):
49        """Turns ip from a virtual mapped address back to a dso address.
50
51        (Frankly I think these are named backwards. This follows the naming
52        convention of perf's struct map.)
53        """
54        # See perf's util/map.h: map__map_ip()
55        return (ip + self.pgoff) - self.start
56
57    def Unmap(self, rip):
58        """Turns ip from a dso address into a virtual mapped address."""
59        # See perf's util/map.h: map__unmap_ip()
60        return self.start + (rip - self.pgoff)
61
62    MMAP_LINE_RE = re.compile(
63            r'(?P<event_ts>\d+) '
64            r'(?P<event_offset>0x[0-9a-fA-F]+|0) '
65            r'[[](?P<event_size>0x[0-9a-fA-F]+|0)[]]: '
66            r'PERF_RECORD_MMAP '
67            r'(?P<pid>-?\d+)/(?P<tid>-?\d+): '
68            r'[[]'
69                r'(?P<start>0x[0-9a-fA-F]+|0)'
70                r'[(](?P<length>0x[0-9a-fA-F]+|0)[)] @ '
71                r'(?P<pgoff>0x[0-9a-fA-F]+|0)'
72            r'[]]: '
73            r'((?P<executable>[rx]) )?'
74            r'(?P<filename>.*)')
75
76    @staticmethod
77    def GetFromPerfData(perf_data_filename, mmap_filename):
78        """Parse perf_data_filename and find how mmap_filename was mapped.
79
80        @param perf_data_filename: perf.data filename.
81        @param mmap_filename: Look for this mmap.
82        @returns: Mmap object representing the map for mmap_filename.
83        """
84        result = None
85        raw_trace_proc = subprocess.Popen(
86                ('perf', 'report', '-D', '-i', perf_data_filename),
87                stdout=subprocess.PIPE, stderr=DEVNULL)
88        for line in raw_trace_proc.stdout:
89            if 'PERF_RECORD_MMAP' not in line:
90                continue
91            match = Mmap.MMAP_LINE_RE.match(line)
92            if not match:
93                raise Error('Unexpected format for MMAP record in raw dump:\n' +
94                            line)
95            if match.group('filename') == mmap_filename:
96                args = match.group('start', 'length', 'pgoff')
97                result = Mmap(*tuple(int(x, 16) for x in args))
98                break
99        for line in raw_trace_proc.stdout:
100            # Skip rest of output
101            pass
102        raw_trace_proc.wait()
103        return result
104
105RAW_EVENT_CODES = {
106    'br_inst_retired.all_branches': 'r4c4',
107}
108
109def TranslateEvents(events):
110    return [RAW_EVENT_CODES.get(e, e) for e in events]
111
112
113# This is the right value for SandyBridge, IvyBridge and Haswell, at least.
114# See Intel manual vol. 3B, 17.4.8 LBR
115# TODO: Consider detecting if 16 is the correct branch buffer length base on the
116# uarch. However, all uarchs we run on have a 16-long buffer.
117BRANCH_BUFFER_LENGTH = 16
118
119def EstimateExpectedSamples(loops, count):
120    """Calculate the number of SAMPLE events expected.
121
122    ie, expect estimate * BRANCH_BUFFER_LENGTH branches to be sampled.
123
124    Incorporates the "observer effect": includes branches caused by returning
125    from PMU interrupts.
126
127    Includes one extra sample due to alignment of samples in the series of
128    branches. This sample can be expected "most" of the time, but it is not
129    incorrect for it to be missing.
130
131    @param loops: the number of noploop branches executed.
132    @param count: the event sampling period. ie, a sample should be collected
133        every count branches.
134    """
135    sample_count = 1 # assume program prolog takes one sample
136
137    all_branches = loops
138    loop_samples = loops/(count-1)
139    while loop_samples >= 1:
140        all_branches += loop_samples
141        # compounding branches caused by samples caused by samples caused ...
142        loop_samples = loop_samples/(count-1)
143
144    sample_count += all_branches / count
145    sample_count += 1  # due to alignment
146    return sample_count
147
148
149def _CountRecordedBranches(perf_data_filename, dso_name, branch_addresses):
150    """Count the branches recorded in perf_data_filename using perf report.
151
152    Count the total number of branches recorded, and also the count recorded
153    at a specific branch.
154
155    @param perf_data_filename: perf data filename
156    @param dso_name: dso that the branch specified by branch_addresses
157        pertains to.
158    @param branch_addresses: pair of (source, target) addresses specifying the
159        branch within dso_name to count.
160    @returns: pair with the the total branches recorded, and the count for
161        the specified branch.
162    """
163    mmap = Mmap.GetFromPerfData(perf_data_filename, dso_name)
164    out = subprocess.check_output(
165            ('perf', 'report', '-i', perf_data_filename, '-nv',
166             '-s', 'dso_from,symbol_from,dso_to,symbol_to'),
167            stderr=DEVNULL)
168    total_sampled_branches = 0
169    branch_samples = 0
170    for line in out.splitlines():
171        if not line or line.startswith('#'):
172            continue
173        record = line.split()
174        samples = int(record[1])
175        dso_from = record[2]
176        raw_from_address = int(record[3], 16)
177        dso_to = record[7]
178        raw_to_address = int(record[8], 16)
179
180        # including non-loop branches
181        total_sampled_branches += samples
182
183        if not (dso_from == dso_to == dso_name):
184            continue
185        from_address = mmap.Map(raw_from_address)
186        to_address = mmap.Map(raw_to_address)
187        if (from_address, to_address) == branch_addresses:
188            branch_samples += samples  # should only match once.
189
190    return total_sampled_branches, branch_samples
191
192
193def GatherPerfBranchSamples(noploop, branch_addresses, events, count,
194                            progress_func=lambda i, j: None):
195    """Run perf record -b with the given events, and noploop program.
196
197    Expects to record the branch specified by branch_addresses.
198
199    @param noploop: Path to noploop binary. It should take one argument (number
200        of loop iterations) and produce no output.
201    @param branch_addresses: pair of branch (source, target) addresses.
202    @param events: Value to pass to '-e' arg of perf stat, which determines when
203        the branch buffer is sampled. ':u' will be appended to each event in
204        order to sample only userspace branches. Some events may be translated
205        to raw event codes if necessary.
206    @param count: Event period to sample.
207    @returns: List of dicts containing facts about the executions of noploop.
208    """
209    events = TranslateEvents(events.split(','))
210    events = ','.join(e + ':u' for e in events)
211    facts = []
212    for i, j in itertools.product(xrange(10), xrange(5)):
213        progress_func(i, j)
214        loops = (i+1) * 10000000  # (i+1) * 10 million
215        fact = {'loops': loops}
216        perf_data = 'perf.lbr.noploop.%d.%d.data' % (loops, j)
217        with CleanupFile(perf_data):
218            subprocess.check_call(
219                    ('perf', 'record', '-o', perf_data,
220                     '-b', '-e', events, '-c', '%d' % count,
221                     noploop, '%d' % loops),
222                    stderr=DEVNULL)
223            noploop_dso_name = os.path.abspath(noploop)
224            total_sampled_branches, branch_samples = _CountRecordedBranches(
225                    perf_data, noploop_dso_name, branch_addresses)
226            fact['branch_count'] = branch_samples
227
228            total_samples = total_sampled_branches / BRANCH_BUFFER_LENGTH
229            total_expected_samples = EstimateExpectedSamples(loops, count)
230            if not (total_samples == total_expected_samples or
231                    total_samples == total_expected_samples - 1):  # alignment
232                raise TestFail('Saw the wrong number of samples: '
233                               'saw %d, expected %d or %d' %
234                               (total_samples,
235                                total_expected_samples,
236                                total_expected_samples - 1))
237
238            if fact['branch_count'] == 0:
239                raise TestFail('No matching branch records found.')
240            facts.append(fact)
241    progress_func(-1, -1)  # Finished
242    return facts
243
244
245def ReadBranchAddressesFile(filename):
246    with open(filename, 'r') as f:
247        branch = tuple(int(x, 16) for x in f.read().split())
248    return branch
249
250
251def main():
252    """Verify the operation of LBR using a simple noploop program and perf."""
253    def _Progress(i, j):
254        if i == -1 and j == -1:  # Finished
255            print
256            return
257        if j == 0:
258            if i != 0:
259                print
260            print i, ':',
261        print j,
262        sys.stdout.flush()
263    branch = ReadBranchAddressesFile('src/noploop_branch.txt')
264    facts = GatherPerfBranchSamples('src/noploop', branch,
265                                    'br_inst_retired.all_branches',
266                                    10000,
267                                    progress_func=_Progress)
268    dt = numpy.dtype([('loops', numpy.int), ('branch_count', numpy.int)])
269    a = stats_utils.FactsToNumpyArray(facts, dt)
270    (slope, intercept), r2 = stats_utils.LinearRegression(
271        a['loops'], a['branch_count'])
272    for f in facts:
273        print f
274    print "slope:", slope
275    print "intercept:", intercept
276    print "r-squared:", r2
277
278if __name__ == '__main__':
279    main()
280