1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21 22import argparse 23import os 24import os.path 25import shutil 26import subprocess 27import sys 28 29from simpleperf_report_lib import * 30from utils import * 31 32class SourceLine(object): 33 def __init__(self, file, function, line): 34 self.file = file 35 self.function = function 36 self.line = line 37 38 @property 39 def file_key(self): 40 return self.file 41 42 @property 43 def function_key(self): 44 return (self.file, self.function) 45 46 @property 47 def line_key(self): 48 return (self.file, self.line) 49 50 51# TODO: using addr2line can't convert from function_start_address to 52# source_file:line very well for java code. Because in .debug_line section, 53# there is some distance between function_start_address and the address 54# of the first instruction which can be mapped to source line. 55class Addr2Line(object): 56 """collect information of how to map [dso_name,vaddr] to [source_file:line]. 57 """ 58 def __init__(self, addr2line_path, symfs_dir=None): 59 self.dso_dict = dict() 60 self.addr2line_path = addr2line_path 61 self.symfs_dir = symfs_dir 62 63 64 def add_addr(self, dso_name, addr): 65 dso = self.dso_dict.get(dso_name) 66 if dso is None: 67 self.dso_dict[dso_name] = dso = dict() 68 if not dso.has_key(addr): 69 dso[addr] = None 70 71 72 def convert_addrs_to_lines(self): 73 # store a list of source files 74 self.file_list = [] 75 # map from file to id with file_list[id] == file 76 self.file_dict = {} 77 self.file_list.append('') 78 self.file_dict[''] = 0 79 80 for dso_name in self.dso_dict.keys(): 81 self._convert_addrs_to_lines(dso_name, self.dso_dict[dso_name]) 82 self._combine_source_files() 83 84 85 def _convert_addrs_to_lines(self, dso_name, dso): 86 dso_path = self._find_dso_path(dso_name) 87 if dso_path is None: 88 log_warning("can't find dso '%s'" % dso_name) 89 dso.clear() 90 return 91 addrs = sorted(dso.keys()) 92 addr_str = [] 93 for addr in addrs: 94 addr_str.append('0x%x' % addr) 95 addr_str = '\n'.join(addr_str) 96 subproc = subprocess.Popen([self.addr2line_path, '-e', dso_path, '-aifC'], 97 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 98 (stdoutdata, _) = subproc.communicate(addr_str) 99 stdoutdata = stdoutdata.strip().split('\n') 100 if len(stdoutdata) < len(addrs): 101 log_fatal("addr2line didn't output enough lines") 102 addr_pos = 0 103 out_pos = 0 104 while addr_pos < len(addrs) and out_pos < len(stdoutdata): 105 addr_line = stdoutdata[out_pos] 106 out_pos += 1 107 assert addr_line[:2] == "0x" 108 assert out_pos < len(stdoutdata) 109 assert addrs[addr_pos] == int(addr_line, 16) 110 source_lines = [] 111 while out_pos < len(stdoutdata) and stdoutdata[out_pos][:2] != "0x": 112 function = stdoutdata[out_pos] 113 out_pos += 1 114 assert out_pos < len(stdoutdata) 115 file, line = stdoutdata[out_pos].split(':') 116 line = line.split()[0] # Remove comments after line number 117 out_pos += 1 118 if file.find('?') != -1: 119 file = 0 120 else: 121 file = self._get_file_id(file) 122 if line.find('?') != -1: 123 line = 0 124 else: 125 line = int(line) 126 source_lines.append(SourceLine(file, function, line)) 127 dso[addrs[addr_pos]] = source_lines 128 addr_pos += 1 129 assert addr_pos == len(addrs) 130 131 132 def _get_file_id(self, file): 133 id = self.file_dict.get(file) 134 if id is None: 135 id = len(self.file_list) 136 self.file_list.append(file) 137 self.file_dict[file] = id 138 return id 139 140 def _combine_source_files(self): 141 """It is possible that addr2line gives us different names for the same 142 file, like: 143 /usr/local/.../src/main/jni/sudo-game-jni.cpp 144 sudo-game-jni.cpp 145 We'd better combine these two files. We can do it by combining 146 source files with no conflicts in path. 147 """ 148 # Collect files having the same filename. 149 filename_dict = dict() 150 for file in self.file_list: 151 index = max(file.rfind('/'), file.rfind(os.sep)) 152 filename = file[index+1:] 153 entry = filename_dict.get(filename) 154 if entry is None: 155 filename_dict[filename] = entry = [] 156 entry.append(file) 157 158 # Combine files having the same filename and having no conflicts in path. 159 for filename in filename_dict.keys(): 160 files = filename_dict[filename] 161 if len(files) == 1: 162 continue 163 for file in files: 164 to_file = file 165 # Test if we can merge files[i] with another file having longer 166 # path. 167 for f in files: 168 if len(f) > len(to_file) and f.find(file) != -1: 169 to_file = f 170 if to_file != file: 171 from_id = self.file_dict[file] 172 to_id = self.file_dict[to_file] 173 self.file_list[from_id] = self.file_list[to_id] 174 175 176 def get_sources(self, dso_name, addr): 177 dso = self.dso_dict.get(dso_name) 178 if dso is None: 179 return [] 180 item = dso.get(addr, []) 181 source_lines = [] 182 for source in item: 183 source_lines.append(SourceLine(self.file_list[source.file], 184 source.function, source.line)) 185 return source_lines 186 187 188 def _find_dso_path(self, dso): 189 if dso[0] != '/' or dso == '//anon': 190 return None 191 if self.symfs_dir: 192 dso_path = os.path.join(self.symfs_dir, dso[1:]) 193 if os.path.isfile(dso_path): 194 return dso_path 195 if os.path.isfile(dso): 196 return dso 197 return None 198 199 200class Period(object): 201 """event count information. It can be used to represent event count 202 of a line, a function, a source file, or a binary. It contains two 203 parts: period and acc_period. 204 When used for a line, period is the event count occurred when running 205 that line, acc_period is the accumulated event count occurred when 206 running that line and functions called by that line. Same thing applies 207 when it is used for a function, a source file, or a binary. 208 """ 209 def __init__(self, period=0, acc_period=0): 210 self.period = period 211 self.acc_period = acc_period 212 213 214 def __iadd__(self, other): 215 self.period += other.period 216 self.acc_period += other.acc_period 217 return self 218 219 220class DsoPeriod(object): 221 """Period for each shared library""" 222 def __init__(self, dso_name): 223 self.dso_name = dso_name 224 self.period = Period() 225 226 227 def add_period(self, period): 228 self.period += period 229 230 231class FilePeriod(object): 232 """Period for each source file""" 233 def __init__(self, file): 234 self.file = file 235 self.period = Period() 236 # Period for each line in the file. 237 self.line_dict = {} 238 # Period for each function in the source file. 239 self.function_dict = {} 240 241 242 def add_period(self, period): 243 self.period += period 244 245 246 def add_line_period(self, line, period): 247 a = self.line_dict.get(line) 248 if a is None: 249 self.line_dict[line] = a = Period() 250 a += period 251 252 253 def add_function_period(self, function_name, function_start_line, period): 254 a = self.function_dict.get(function_name) 255 if not a: 256 if function_start_line is None: 257 function_start_line = -1 258 self.function_dict[function_name] = a = [function_start_line, Period()] 259 a[1] += period 260 261 262class SourceFileAnnotator(object): 263 """group code for annotating source files""" 264 def __init__(self, config): 265 # check config variables 266 config_names = ['perf_data_list', 'symfs_dir', 'source_dirs', 267 'annotate_dest_dir', 'comm_filters', 'pid_filters', 268 'tid_filters', 'dso_filters', 'addr2line_path'] 269 for name in config_names: 270 if not config.has_key(name): 271 log_fatal('config [%s] is missing' % name) 272 symfs_dir = config['symfs_dir'] 273 if symfs_dir and not os.path.isdir(symfs_dir): 274 log_fatal('[symfs_dir] "%s" is not a dir' % symfs_dir) 275 kallsyms = config['kallsyms'] 276 if kallsyms and not os.path.isfile(kallsyms): 277 log_fatal('[kallsyms] "%s" is not a file' % kallsyms) 278 source_dirs = config['source_dirs'] 279 for dir in source_dirs: 280 if not os.path.isdir(dir): 281 log_fatal('[source_dirs] "%s" is not a dir' % dir) 282 283 # init member variables 284 self.config = config 285 self.symfs_dir = config.get('symfs_dir') 286 self.kallsyms = config.get('kallsyms') 287 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 288 if config.get('pid_filters'): 289 self.pid_filter = {int(x) for x in config['pid_filters']} 290 else: 291 self.pid_filter = None 292 if config.get('tid_filters'): 293 self.tid_filter = {int(x) for x in config['tid_filters']} 294 else: 295 self.tid_filter = None 296 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 297 298 output_dir = config['annotate_dest_dir'] 299 if os.path.isdir(output_dir): 300 shutil.rmtree(output_dir) 301 os.makedirs(output_dir) 302 303 self.addr2line = Addr2Line(self.config['addr2line_path'], symfs_dir) 304 305 306 def annotate(self): 307 self._collect_addrs() 308 self._convert_addrs_to_lines() 309 self._generate_periods() 310 self._write_summary() 311 self._collect_source_files() 312 self._annotate_files() 313 314 315 def _collect_addrs(self): 316 """Read perf.data, collect all addresses we need to convert to 317 source file:line. 318 """ 319 for perf_data in self.config['perf_data_list']: 320 lib = ReportLib() 321 lib.SetRecordFile(perf_data) 322 if self.symfs_dir: 323 lib.SetSymfs(self.symfs_dir) 324 if self.kallsyms: 325 lib.SetKallsymsFile(self.kallsyms) 326 while True: 327 sample = lib.GetNextSample() 328 if sample is None: 329 lib.Close() 330 break 331 if not self._filter_sample(sample): 332 continue 333 symbols = [] 334 symbols.append(lib.GetSymbolOfCurrentSample()) 335 callchain = lib.GetCallChainOfCurrentSample() 336 for i in range(callchain.nr): 337 symbols.append(callchain.entries[i].symbol) 338 for symbol in symbols: 339 if self._filter_symbol(symbol): 340 self.addr2line.add_addr(symbol.dso_name, symbol.vaddr_in_file) 341 self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr) 342 343 344 def _filter_sample(self, sample): 345 """Return true if the sample can be used.""" 346 if self.comm_filter: 347 if sample.thread_comm not in self.comm_filter: 348 return False 349 if self.pid_filter: 350 if sample.pid not in self.pid_filter: 351 return False 352 if self.tid_filter: 353 if sample.tid not in self.tid_filter: 354 return False 355 return True 356 357 358 def _filter_symbol(self, symbol): 359 if not self.dso_filter or symbol.dso_name in self.dso_filter: 360 return True 361 return False 362 363 364 def _convert_addrs_to_lines(self): 365 self.addr2line.convert_addrs_to_lines() 366 367 368 def _generate_periods(self): 369 """read perf.data, collect Period for all types: 370 binaries, source files, functions, lines. 371 """ 372 self.period = 0 373 self.dso_periods = dict() 374 self.file_periods = dict() 375 for perf_data in self.config['perf_data_list']: 376 lib = ReportLib() 377 lib.SetRecordFile(perf_data) 378 if self.symfs_dir: 379 lib.SetSymfs(self.symfs_dir) 380 if self.kallsyms: 381 lib.SetKallsymsFile(self.kallsyms) 382 while True: 383 sample = lib.GetNextSample() 384 if sample is None: 385 lib.Close() 386 break 387 if not self._filter_sample(sample): 388 continue 389 symbols = [] 390 symbols.append(lib.GetSymbolOfCurrentSample()) 391 callchain = lib.GetCallChainOfCurrentSample() 392 for i in range(callchain.nr): 393 symbols.append(callchain.entries[i].symbol) 394 # Each sample has a callchain, but its period is only used once 395 # to add period for each function/source_line/source_file/binary. 396 # For example, if more than one entry in the callchain hits a 397 # function, the event count of that function is only increased once. 398 # Otherwise, we may get periods > 100%. 399 is_sample_used = False 400 used_dso_dict = dict() 401 used_file_dict = dict() 402 used_function_dict = dict() 403 used_line_dict = dict() 404 period = Period(sample.period, sample.period) 405 for i in range(len(symbols)): 406 symbol = symbols[i] 407 if i == 1: 408 period = Period(0, sample.period) 409 if not self._filter_symbol(symbol): 410 continue 411 is_sample_used = True 412 # Add period to dso. 413 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 414 # Add period to source file. 415 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 416 for source in sources: 417 if source.file: 418 self._add_file_period(source, period, used_file_dict) 419 # Add period to line. 420 if source.line: 421 self._add_line_period(source, period, used_line_dict) 422 # Add period to function. 423 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 424 for source in sources: 425 if source.file: 426 self._add_file_period(source, period, used_file_dict) 427 if source.function: 428 self._add_function_period(source, period, used_function_dict) 429 430 if is_sample_used: 431 self.period += sample.period 432 433 434 def _add_dso_period(self, dso_name, period, used_dso_dict): 435 if not used_dso_dict.has_key(dso_name): 436 used_dso_dict[dso_name] = True 437 dso_period = self.dso_periods.get(dso_name) 438 if dso_period is None: 439 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 440 dso_period.add_period(period) 441 442 443 def _add_file_period(self, source, period, used_file_dict): 444 if not used_file_dict.has_key(source.file_key): 445 used_file_dict[source.file_key] = True 446 file_period = self.file_periods.get(source.file) 447 if file_period is None: 448 file_period = self.file_periods[source.file] = FilePeriod(source.file) 449 file_period.add_period(period) 450 451 452 def _add_line_period(self, source, period, used_line_dict): 453 if not used_line_dict.has_key(source.line_key): 454 used_line_dict[source.line_key] = True 455 file_period = self.file_periods[source.file] 456 file_period.add_line_period(source.line, period) 457 458 459 def _add_function_period(self, source, period, used_function_dict): 460 if not used_function_dict.has_key(source.function_key): 461 used_function_dict[source.function_key] = True 462 file_period = self.file_periods[source.file] 463 file_period.add_function_period(source.function, source.line, period) 464 465 466 def _write_summary(self): 467 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 468 with open(summary, 'w') as f: 469 f.write('total period: %d\n\n' % self.period) 470 dso_periods = sorted(self.dso_periods.values(), 471 cmp=lambda x, y: cmp(y.period.acc_period, x.period.acc_period)) 472 for dso_period in dso_periods: 473 f.write('dso %s: %s\n' % (dso_period.dso_name, 474 self._get_percentage_str(dso_period.period))) 475 f.write('\n') 476 477 file_periods = sorted(self.file_periods.values(), 478 cmp=lambda x, y: cmp(y.period.acc_period, x.period.acc_period)) 479 for file_period in file_periods: 480 f.write('file %s: %s\n' % (file_period.file, 481 self._get_percentage_str(file_period.period))) 482 for file_period in file_periods: 483 f.write('\n\n%s: %s\n' % (file_period.file, 484 self._get_percentage_str(file_period.period))) 485 values = [] 486 for func_name in file_period.function_dict.keys(): 487 func_start_line, period = file_period.function_dict[func_name] 488 values.append((func_name, func_start_line, period)) 489 values = sorted(values, 490 cmp=lambda x, y: cmp(y[2].acc_period, x[2].acc_period)) 491 for value in values: 492 f.write('\tfunction (%s): line %d, %s\n' % ( 493 value[0], value[1], self._get_percentage_str(value[2]))) 494 f.write('\n') 495 for line in sorted(file_period.line_dict.keys()): 496 f.write('\tline %d: %s\n' % ( 497 line, self._get_percentage_str(file_period.line_dict[line]))) 498 499 500 def _get_percentage_str(self, period, short=False): 501 s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%' 502 return s % self._get_percentage(period) 503 504 505 def _get_percentage(self, period): 506 if self.period == 0: 507 return (0, 0) 508 acc_p = 100.0 * period.acc_period / self.period 509 p = 100.0 * period.period / self.period 510 return (acc_p, p) 511 512 513 def _collect_source_files(self): 514 self.source_file_dict = dict() 515 source_file_suffix = ['h', 'c', 'cpp', 'cc', 'java'] 516 for source_dir in self.config['source_dirs']: 517 for root, _, files in os.walk(source_dir): 518 for file in files: 519 if file[file.rfind('.')+1:] in source_file_suffix: 520 entry = self.source_file_dict.get(file) 521 if entry is None: 522 entry = self.source_file_dict[file] = [] 523 entry.append(os.path.join(root, file)) 524 525 526 def _find_source_file(self, file): 527 filename = file[file.rfind(os.sep)+1:] 528 source_files = self.source_file_dict.get(filename) 529 if source_files is None: 530 return None 531 match_count = 0 532 result = None 533 for path in source_files: 534 if path.find(file) != -1: 535 match_count += 1 536 result = path 537 if match_count > 1: 538 log_warning('multiple source for %s, select %s' % (file, result)) 539 return result 540 541 542 def _annotate_files(self): 543 """Annotate Source files: add acc_period/period for each source file. 544 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 545 2. Annotate c++ source files. 546 """ 547 dest_dir = self.config['annotate_dest_dir'] 548 for key in self.file_periods.keys(): 549 is_java = False 550 if key.startswith('$JAVA_SRC_ROOT/'): 551 path = key[len('$JAVA_SRC_ROOT/'):] 552 items = path.split('/') 553 path = os.sep.join(items) 554 from_path = self._find_source_file(path) 555 to_path = os.path.join(dest_dir, 'java', path) 556 is_java = True 557 elif key.startswith('/') and os.path.isfile(key): 558 path = key 559 from_path = path 560 to_path = os.path.join(dest_dir, path[1:]) 561 else: 562 path = key[1:] if key.startswith('/') else key 563 # Change path on device to path on host 564 path = os.sep.join(path.split('/')) 565 from_path = self._find_source_file(path) 566 to_path = os.path.join(dest_dir, path) 567 if from_path is None: 568 log_warning("can't find source file for path %s" % key) 569 continue 570 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 571 572 573 def _annotate_file(self, from_path, to_path, file_period, is_java): 574 """Annotate a source file. 575 576 Annotate a source file in three steps: 577 1. In the first line, show periods of this file. 578 2. For each function, show periods of this function. 579 3. For each line not hitting the same line as functions, show 580 line periods. 581 """ 582 log_info('annotate file %s' % from_path) 583 with open(from_path, 'r') as rf: 584 lines = rf.readlines() 585 586 annotates = dict() 587 for line in file_period.line_dict.keys(): 588 annotates[line] = self._get_percentage_str(file_period.line_dict[line], True) 589 for func_name in file_period.function_dict.keys(): 590 func_start_line, period = file_period.function_dict[func_name] 591 if func_start_line == -1: 592 continue 593 line = func_start_line - 1 if is_java else func_start_line 594 annotates[line] = '[func] ' + self._get_percentage_str(period, True) 595 annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True) 596 597 max_annotate_cols = 0 598 for key in annotates.keys(): 599 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 600 601 empty_annotate = ' ' * (max_annotate_cols + 6) 602 603 dirname = os.path.dirname(to_path) 604 if not os.path.isdir(dirname): 605 os.makedirs(dirname) 606 with open(to_path, 'w') as wf: 607 for line in range(1, len(lines) + 1): 608 annotate = annotates.get(line) 609 if annotate is None: 610 annotate = empty_annotate 611 else: 612 annotate = '/* ' + annotate + ( 613 ' ' * (max_annotate_cols - len(annotate))) + ' */' 614 wf.write(annotate) 615 wf.write(lines[line-1]) 616 617 618if __name__ == '__main__': 619 parser = argparse.ArgumentParser( 620 description='Annotate based on perf.data. See configurations in annotate.config.') 621 parser.add_argument('--config', default='annotate.config', 622 help='Set configuration file. Default is annotate.config.') 623 args = parser.parse_args() 624 config = load_config(args.config) 625 annotator = SourceFileAnnotator(config) 626 annotator.annotate() 627