1#!/usr/bin/python2.7 2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import contextlib 7import itertools 8import os 9import os.path 10import re 11import subprocess 12 13import numpy 14 15import stats_utils 16 17 18class Error(Exception): 19 """Module error class.""" 20 21class TestFail(Error): 22 """Indicates a test condition failed (as opposed to tool failure).""" 23 24 25@contextlib.contextmanager 26def CleanupFile(path): 27 """Context manager that deletes path on exit.""" 28 try: 29 yield 30 finally: 31 os.remove(path) 32 33 34DEVNULL = open('/dev/null', 'w') 35 36 37class Mmap(object): 38 """Represents a memory map, and does the (un)mapping arithmetic.""" 39 40 def __init__(self, start, length, pgoff): 41 self.start = start 42 self.length = length 43 self.pgoff = pgoff 44 45 def __repr__(self): 46 return '[%x(%x) @ %x]' % (self.start, self.length, self.pgoff) 47 48 def Map(self, ip): 49 """Turns ip from a virtual mapped address back to a dso address. 50 51 (Frankly I think these are named backwards. This follows the naming 52 convention of perf's struct map.) 53 """ 54 # See perf's util/map.h: map__map_ip() 55 return (ip + self.pgoff) - self.start 56 57 def Unmap(self, rip): 58 """Turns ip from a dso address into a virtual mapped address.""" 59 # See perf's util/map.h: map__unmap_ip() 60 return self.start + (rip - self.pgoff) 61 62 MMAP_LINE_RE = re.compile( 63 r'(?P<event_ts>\d+) ' 64 r'(?P<event_offset>0x[0-9a-fA-F]+|0) ' 65 r'[[](?P<event_size>0x[0-9a-fA-F]+|0)[]]: ' 66 r'PERF_RECORD_MMAP ' 67 r'(?P<pid>-?\d+)/(?P<tid>-?\d+): ' 68 r'[[]' 69 r'(?P<start>0x[0-9a-fA-F]+|0)' 70 r'[(](?P<length>0x[0-9a-fA-F]+|0)[)] @ ' 71 r'(?P<pgoff>0x[0-9a-fA-F]+|0)' 72 r'[]]: ' 73 r'((?P<executable>[rx]) )?' 74 r'(?P<filename>.*)') 75 76 @staticmethod 77 def GetFromPerfData(perf_data_filename, mmap_filename): 78 """Parse perf_data_filename and find how mmap_filename was mapped. 79 80 @param perf_data_filename: perf.data filename. 81 @param mmap_filename: Look for this mmap. 82 @returns: Mmap object representing the map for mmap_filename. 83 """ 84 result = None 85 raw_trace_proc = subprocess.Popen( 86 ('perf', 'report', '-D', '-i', perf_data_filename), 87 stdout=subprocess.PIPE, stderr=DEVNULL) 88 for line in raw_trace_proc.stdout: 89 if 'PERF_RECORD_MMAP' not in line: 90 continue 91 match = Mmap.MMAP_LINE_RE.match(line) 92 if not match: 93 raise Error('Unexpected format for MMAP record in raw dump:\n' + 94 line) 95 if match.group('filename') == mmap_filename: 96 args = match.group('start', 'length', 'pgoff') 97 result = Mmap(*tuple(int(x, 16) for x in args)) 98 break 99 for line in raw_trace_proc.stdout: 100 # Skip rest of output 101 pass 102 raw_trace_proc.wait() 103 return result 104 105RAW_EVENT_CODES = { 106 'br_inst_retired.all_branches': 'r4c4', 107} 108 109def TranslateEvents(events): 110 return [RAW_EVENT_CODES.get(e, e) for e in events] 111 112 113# This is the right value for SandyBridge, IvyBridge and Haswell, at least. 114# See Intel manual vol. 3B, 17.4.8 LBR 115# TODO: Consider detecting if 16 is the correct branch buffer length base on the 116# uarch. However, all uarchs we run on have a 16-long buffer. 117BRANCH_BUFFER_LENGTH = 16 118 119def EstimateExpectedSamples(loops, count): 120 """Calculate the number of SAMPLE events expected. 121 122 ie, expect estimate * BRANCH_BUFFER_LENGTH branches to be sampled. 123 124 Incorporates the "observer effect": includes branches caused by returning 125 from PMU interrupts. 126 127 Includes one extra sample due to alignment of samples in the series of 128 branches. This sample can be expected "most" of the time, but it is not 129 incorrect for it to be missing. 130 131 @param loops: the number of noploop branches executed. 132 @param count: the event sampling period. ie, a sample should be collected 133 every count branches. 134 """ 135 sample_count = 1 # assume program prolog takes one sample 136 137 all_branches = loops 138 loop_samples = loops/(count-1) 139 while loop_samples >= 1: 140 all_branches += loop_samples 141 # compounding branches caused by samples caused by samples caused ... 142 loop_samples = loop_samples/(count-1) 143 144 sample_count += all_branches / count 145 sample_count += 1 # due to alignment 146 return sample_count 147 148 149def _CountRecordedBranches(perf_data_filename, dso_name, branch_addresses): 150 """Count the branches recorded in perf_data_filename using perf report. 151 152 Count the total number of branches recorded, and also the count recorded 153 at a specific branch. 154 155 @param perf_data_filename: perf data filename 156 @param dso_name: dso that the branch specified by branch_addresses 157 pertains to. 158 @param branch_addresses: pair of (source, target) addresses specifying the 159 branch within dso_name to count. 160 @returns: pair with the the total branches recorded, and the count for 161 the specified branch. 162 """ 163 mmap = Mmap.GetFromPerfData(perf_data_filename, dso_name) 164 out = subprocess.check_output( 165 ('perf', 'report', '-i', perf_data_filename, '-nv', 166 '-s', 'dso_from,symbol_from,dso_to,symbol_to'), 167 stderr=DEVNULL) 168 total_sampled_branches = 0 169 branch_samples = 0 170 for line in out.splitlines(): 171 if not line or line.startswith('#'): 172 continue 173 record = line.split() 174 samples = int(record[1]) 175 dso_from = record[2] 176 raw_from_address = int(record[3], 16) 177 dso_to = record[7] 178 raw_to_address = int(record[8], 16) 179 180 # including non-loop branches 181 total_sampled_branches += samples 182 183 if not (dso_from == dso_to == dso_name): 184 continue 185 from_address = mmap.Map(raw_from_address) 186 to_address = mmap.Map(raw_to_address) 187 if (from_address, to_address) == branch_addresses: 188 branch_samples += samples # should only match once. 189 190 return total_sampled_branches, branch_samples 191 192 193def GatherPerfBranchSamples(noploop, branch_addresses, events, count, 194 progress_func=lambda i, j: None): 195 """Run perf record -b with the given events, and noploop program. 196 197 Expects to record the branch specified by branch_addresses. 198 199 @param noploop: Path to noploop binary. It should take one argument (number 200 of loop iterations) and produce no output. 201 @param branch_addresses: pair of branch (source, target) addresses. 202 @param events: Value to pass to '-e' arg of perf stat, which determines when 203 the branch buffer is sampled. ':u' will be appended to each event in 204 order to sample only userspace branches. Some events may be translated 205 to raw event codes if necessary. 206 @param count: Event period to sample. 207 @returns: List of dicts containing facts about the executions of noploop. 208 """ 209 events = TranslateEvents(events.split(',')) 210 events = ','.join(e + ':u' for e in events) 211 facts = [] 212 for i, j in itertools.product(xrange(10), xrange(5)): 213 progress_func(i, j) 214 loops = (i+1) * 10000000 # (i+1) * 10 million 215 fact = {'loops': loops} 216 perf_data = 'perf.lbr.noploop.%d.%d.data' % (loops, j) 217 with CleanupFile(perf_data): 218 subprocess.check_call( 219 ('perf', 'record', '-o', perf_data, 220 '-b', '-e', events, '-c', '%d' % count, 221 noploop, '%d' % loops), 222 stderr=DEVNULL) 223 noploop_dso_name = os.path.abspath(noploop) 224 total_sampled_branches, branch_samples = _CountRecordedBranches( 225 perf_data, noploop_dso_name, branch_addresses) 226 fact['branch_count'] = branch_samples 227 228 total_samples = total_sampled_branches / BRANCH_BUFFER_LENGTH 229 total_expected_samples = EstimateExpectedSamples(loops, count) 230 if not (total_samples == total_expected_samples or 231 total_samples == total_expected_samples - 1): # alignment 232 raise TestFail('Saw the wrong number of samples: ' 233 'saw %d, expected %d or %d' % 234 (total_samples, 235 total_expected_samples, 236 total_expected_samples - 1)) 237 238 if fact['branch_count'] == 0: 239 raise TestFail('No matching branch records found.') 240 facts.append(fact) 241 progress_func(-1, -1) # Finished 242 return facts 243 244 245def ReadBranchAddressesFile(filename): 246 with open(filename, 'r') as f: 247 branch = tuple(int(x, 16) for x in f.read().split()) 248 return branch 249 250 251def main(): 252 """Verify the operation of LBR using a simple noploop program and perf.""" 253 def _Progress(i, j): 254 if i == -1 and j == -1: # Finished 255 print 256 return 257 if j == 0: 258 if i != 0: 259 print 260 print i, ':', 261 print j, 262 sys.stdout.flush() 263 branch = ReadBranchAddressesFile('src/noploop_branch.txt') 264 facts = GatherPerfBranchSamples('src/noploop', branch, 265 'br_inst_retired.all_branches', 266 10000, 267 progress_func=_Progress) 268 dt = numpy.dtype([('loops', numpy.int), ('branch_count', numpy.int)]) 269 a = stats_utils.FactsToNumpyArray(facts, dt) 270 (slope, intercept), r2 = stats_utils.LinearRegression( 271 a['loops'], a['branch_count']) 272 for f in facts: 273 print f 274 print "slope:", slope 275 print "intercept:", intercept 276 print "r-squared:", r2 277 278if __name__ == '__main__': 279 main() 280