1#!/usr/bin/env python 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 python app_profiler.py 23 python pprof_proto_generator.py 24 pprof -text pprof.profile 25""" 26 27from __future__ import print_function 28import argparse 29import os 30import os.path 31import profile_pb2 32import re 33import shutil 34import subprocess 35import sys 36import time 37 38from annotate import Addr2Line 39from simpleperf_report_lib import * 40from utils import * 41 42 43def load_pprof_profile(filename): 44 profile = profile_pb2.Profile() 45 with open(filename, "rb") as f: 46 profile.ParseFromString(f.read()) 47 return profile 48 49 50def store_pprof_profile(filename, profile): 51 with open(filename, 'wb') as f: 52 f.write(profile.SerializeToString()) 53 54 55class PprofProfilePrinter(object): 56 57 def __init__(self, profile): 58 self.profile = profile 59 self.string_table = profile.string_table 60 61 def show(self): 62 p = self.profile 63 sub_space = ' ' 64 print('Profile {') 65 print('%d sample_types' % len(p.sample_type)) 66 for i in range(len(p.sample_type)): 67 print('sample_type[%d] = ' % i, end='') 68 self.show_value_type(p.sample_type[i]) 69 print('%d samples' % len(p.sample)) 70 for i in range(len(p.sample)): 71 print('sample[%d]:' % i) 72 self.show_sample(p.sample[i], sub_space) 73 print('%d mappings' % len(p.mapping)) 74 for i in range(len(p.mapping)): 75 print('mapping[%d]:' % i) 76 self.show_mapping(p.mapping[i], sub_space) 77 print('%d locations' % len(p.location)) 78 for i in range(len(p.location)): 79 print('location[%d]:' % i) 80 self.show_location(p.location[i], sub_space) 81 for i in range(len(p.function)): 82 print('function[%d]:' % i) 83 self.show_function(p.function[i], sub_space) 84 print('%d strings' % len(p.string_table)) 85 for i in range(len(p.string_table)): 86 print('string[%d]: %s' % (i, p.string_table[i])) 87 print('drop_frames: %s' % self.string(p.drop_frames)) 88 print('keep_frames: %s' % self.string(p.keep_frames)) 89 print('time_nanos: %u' % p.time_nanos) 90 print('duration_nanos: %u' % p.duration_nanos) 91 print('period_type: ', end='') 92 self.show_value_type(p.period_type) 93 print('period: %u' % p.period) 94 for i in range(len(p.comment)): 95 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 96 print('default_sample_type: %d' % p.default_sample_type) 97 print('} // Profile') 98 print() 99 100 def show_value_type(self, value_type, space=''): 101 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 102 (space, value_type.type, value_type.unit, 103 self.string(value_type.type), self.string(value_type.unit))) 104 105 def show_sample(self, sample, space=''): 106 sub_space = space + ' ' 107 for i in range(len(sample.location_id)): 108 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 109 self.show_location_id(sample.location_id[i], sub_space) 110 for i in range(len(sample.value)): 111 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 112 for i in range(len(sample.label)): 113 print('%slabel[%d] = ', (space, i)) 114 115 def show_location_id(self, location_id, space=''): 116 location = self.profile.location[location_id - 1] 117 self.show_location(location, space) 118 119 def show_location(self, location, space=''): 120 sub_space = space + ' ' 121 print('%sid: %d' % (space, location.id)) 122 print('%smapping_id: %d' % (space, location.mapping_id)) 123 self.show_mapping_id(location.mapping_id, sub_space) 124 print('%saddress: %x' % (space, location.address)) 125 for i in range(len(location.line)): 126 print('%sline[%d]:' % (space, i)) 127 self.show_line(location.line[i], sub_space) 128 129 def show_mapping_id(self, mapping_id, space=''): 130 mapping = self.profile.mapping[mapping_id - 1] 131 self.show_mapping(mapping, space) 132 133 def show_mapping(self, mapping, space=''): 134 print('%sid: %d' % (space, mapping.id)) 135 print('%smemory_start: %x' % (space, mapping.memory_start)) 136 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 137 print('%sfile_offset: %x' % (space, mapping.file_offset)) 138 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 139 mapping.filename)) 140 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 141 mapping.build_id)) 142 print('%shas_functions: %s' % (space, mapping.has_functions)) 143 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 144 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 145 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 146 147 def show_line(self, line, space=''): 148 sub_space = space + ' ' 149 print('%sfunction_id: %d' % (space, line.function_id)) 150 self.show_function_id(line.function_id, sub_space) 151 print('%sline: %d' % (space, line.line)) 152 153 def show_function_id(self, function_id, space=''): 154 function = self.profile.function[function_id - 1] 155 self.show_function(function, space) 156 157 def show_function(self, function, space=''): 158 print('%sid: %d' % (space, function.id)) 159 print('%sname: %s' % (space, self.string(function.name))) 160 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 161 print('%sfilename: %s' % (space, self.string(function.filename))) 162 print('%sstart_line: %d' % (space, function.start_line)) 163 164 def show_label(self, label, space=''): 165 print('%sLabel(%s =', space, self.string(label.key), end='') 166 if label.HasField('str'): 167 print('%s)' % self.get_string(label.str)) 168 else: 169 print('%d)' % label.num) 170 171 def string(self, id): 172 return self.string_table[id] 173 174 175class Sample(object): 176 177 def __init__(self): 178 self.location_ids = [] 179 self.values = {} 180 181 def add_location_id(self, location_id): 182 self.location_ids.append(location_id) 183 184 def add_value(self, id, value): 185 self.values[id] = self.values.get(id, 0) + value 186 187 def add_values(self, values): 188 for id in values.keys(): 189 value = values[id] 190 self.add_value(id, value) 191 192 @property 193 def key(self): 194 return tuple(self.location_ids) 195 196 197class Location(object): 198 199 def __init__(self, mapping_id, address, vaddr_in_dso): 200 self.id = -1 # unset 201 self.mapping_id = mapping_id 202 self.address = address 203 self.vaddr_in_dso = vaddr_in_dso 204 self.lines = [] 205 206 @property 207 def key(self): 208 return (self.mapping_id, self.address) 209 210 211class Line(object): 212 213 def __init__(self): 214 self.function_id = 0 215 self.line = 0 216 217 218class Mapping(object): 219 220 def __init__(self, start, end, pgoff, filename_id, build_id_id): 221 self.id = -1 # unset 222 self.memory_start = start 223 self.memory_limit = end 224 self.file_offset = pgoff 225 self.filename_id = filename_id 226 self.build_id_id = build_id_id 227 228 @property 229 def key(self): 230 return ( 231 self.memory_start, 232 self.memory_limit, 233 self.file_offset, 234 self.filename_id, 235 self.build_id_id) 236 237 238class Function(object): 239 240 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 241 self.id = -1 # unset 242 self.name_id = name_id 243 self.dso_name_id = dso_name_id 244 self.vaddr_in_dso = vaddr_in_dso 245 self.source_filename_id = 0 246 self.start_line = 0 247 248 @property 249 def key(self): 250 return (self.name_id, self.dso_name_id) 251 252 253class PprofProfileGenerator(object): 254 255 def __init__(self, config): 256 self.config = config 257 self.lib = ReportLib() 258 259 if config.get('binary_cache_dir'): 260 self.lib.SetSymfs(config['binary_cache_dir']) 261 if config.get('record_file'): 262 self.lib.SetRecordFile(config['record_file']) 263 if config.get('kallsyms'): 264 self.lib.SetKallsymsFile(config['kallsyms']) 265 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 266 if config.get('pid_filters'): 267 self.pid_filter = {int(x) for x in config['pid_filters']} 268 else: 269 self.pid_filter = None 270 if config.get('tid_filters'): 271 self.tid_filter = {int(x) for x in config['tid_filters']} 272 else: 273 self.tid_filter = None 274 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 275 276 def gen(self): 277 self.profile = profile_pb2.Profile() 278 self.profile.string_table.append('') 279 self.string_table = {} 280 self.sample_types = {} 281 self.sample_map = {} 282 self.sample_list = [] 283 self.location_map = {} 284 self.location_list = [] 285 self.mapping_map = {} 286 self.mapping_list = [] 287 self.function_map = {} 288 self.function_list = [] 289 290 # 1. Process all samples in perf.data, aggregate samples. 291 while True: 292 report_sample = self.lib.GetNextSample() 293 if report_sample is None: 294 self.lib.Close() 295 break 296 event = self.lib.GetEventOfCurrentSample() 297 symbol = self.lib.GetSymbolOfCurrentSample() 298 callchain = self.lib.GetCallChainOfCurrentSample() 299 300 if not self._filter_report_sample(report_sample): 301 continue 302 303 sample_type_id = self.get_sample_type_id(event.name) 304 sample = Sample() 305 sample.add_value(sample_type_id, 1) 306 sample.add_value(sample_type_id + 1, report_sample.period) 307 if self._filter_symbol(symbol): 308 location_id = self.get_location_id(symbol.vaddr_in_file, symbol) 309 sample.add_location_id(location_id) 310 for i in range(callchain.nr): 311 entry = callchain.entries[i] 312 if self._filter_symbol(symbol): 313 location_id = self.get_location_id(entry.ip, entry.symbol) 314 sample.add_location_id(location_id) 315 if sample.location_ids: 316 self.add_sample(sample) 317 318 # 2. Generate line info for locations and functions. 319 self.gen_source_lines() 320 321 # 3. Produce samples/locations/functions in profile 322 for sample in self.sample_list: 323 self.gen_profile_sample(sample) 324 for mapping in self.mapping_list: 325 self.gen_profile_mapping(mapping) 326 for location in self.location_list: 327 self.gen_profile_location(location) 328 for function in self.function_list: 329 self.gen_profile_function(function) 330 331 return self.profile 332 333 def _filter_report_sample(self, sample): 334 """Return true if the sample can be used.""" 335 if self.comm_filter: 336 if sample.thread_comm not in self.comm_filter: 337 return False 338 if self.pid_filter: 339 if sample.pid not in self.pid_filter: 340 return False 341 if self.tid_filter: 342 if sample.tid not in self.tid_filter: 343 return False 344 return True 345 346 def _filter_symbol(self, symbol): 347 if not self.dso_filter or symbol.dso_name in self.dso_filter: 348 return True 349 return False 350 351 def get_string_id(self, str): 352 if len(str) == 0: 353 return 0 354 id = self.string_table.get(str) 355 if id is not None: 356 return id 357 id = len(self.string_table) + 1 358 self.string_table[str] = id 359 self.profile.string_table.append(str) 360 return id 361 362 def get_string(self, string_id): 363 return self.profile.string_table[string_id] 364 365 def get_sample_type_id(self, name): 366 id = self.sample_types.get(name) 367 if id is not None: 368 return id 369 id = len(self.profile.sample_type) 370 sample_type = self.profile.sample_type.add() 371 sample_type.type = self.get_string_id('event_' + name + '_samples') 372 sample_type.unit = self.get_string_id('count') 373 sample_type = self.profile.sample_type.add() 374 sample_type.type = self.get_string_id('event_' + name + '_count') 375 sample_type.unit = self.get_string_id('count') 376 self.sample_types[name] = id 377 return id 378 379 def get_location_id(self, ip, symbol): 380 mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name) 381 location = Location(mapping_id, ip, symbol.vaddr_in_file) 382 # Default line info only contains the function name 383 line = Line() 384 line.function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name, 385 symbol.symbol_addr) 386 location.lines.append(line) 387 388 exist_location = self.location_map.get(location.key) 389 if exist_location: 390 return exist_location.id 391 # location_id starts from 1 392 location.id = len(self.location_list) + 1 393 self.location_list.append(location) 394 self.location_map[location.key] = location 395 return location.id 396 397 def get_mapping_id(self, report_mapping, filename): 398 filename_id = self.get_string_id(filename) 399 build_id = self.lib.GetBuildIdForPath(filename) 400 if build_id and build_id[0:2] == "0x": 401 build_id = build_id[2:] 402 build_id_id = self.get_string_id(build_id) 403 mapping = Mapping(report_mapping.start, report_mapping.end, 404 report_mapping.pgoff, filename_id, build_id_id) 405 exist_mapping = self.mapping_map.get(mapping.key) 406 if exist_mapping: 407 return exist_mapping.id 408 # mapping_id starts from 1 409 mapping.id = len(self.mapping_list) + 1 410 self.mapping_list.append(mapping) 411 self.mapping_map[mapping.key] = mapping 412 return mapping.id 413 414 def get_mapping(self, mapping_id): 415 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 416 417 def get_function_id(self, name, dso_name, vaddr_in_file): 418 if name == 'unknown': 419 return 0 420 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 421 exist_function = self.function_map.get(function.key) 422 if exist_function: 423 return exist_function.id 424 # function_id starts from 1 425 function.id = len(self.function_list) + 1 426 self.function_list.append(function) 427 self.function_map[function.key] = function 428 return function.id 429 430 def get_function(self, function_id): 431 return self.function_list[function_id - 1] if function_id > 0 else None 432 433 def add_sample(self, sample): 434 exist_sample = self.sample_map.get(sample.key) 435 if exist_sample: 436 exist_sample.add_values(sample.values) 437 else: 438 self.sample_list.append(sample) 439 self.sample_map[sample.key] = sample 440 441 def gen_source_lines(self): 442 # 1. Create Addr2line instance 443 addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir']) 444 445 # 2. Put all needed addresses to it. 446 for location in self.location_list: 447 mapping = self.get_mapping(location.mapping_id) 448 dso_name = self.get_string(mapping.filename_id) 449 addr2line.add_addr(dso_name, location.vaddr_in_dso) 450 for function in self.function_list: 451 dso_name = self.get_string(function.dso_name_id) 452 addr2line.add_addr(dso_name, function.vaddr_in_dso) 453 454 # 3. Generate source lines. 455 addr2line.convert_addrs_to_lines() 456 457 # 4. Annotate locations and functions. 458 for location in self.location_list: 459 mapping = self.get_mapping(location.mapping_id) 460 dso_name = self.get_string(mapping.filename_id) 461 sources = addr2line.get_sources(dso_name, location.vaddr_in_dso) 462 source_id = 0 463 for source in sources: 464 if source.file and source.function and source.line: 465 if source_id == 0: 466 # Clear default line info 467 location.lines = [] 468 location.lines.append(self.add_line(source, dso_name)) 469 source_id += 1 470 471 for function in self.function_list: 472 dso_name = self.get_string(function.dso_name_id) 473 if function.vaddr_in_dso: 474 sources = addr2line.get_sources(dso_name, function.vaddr_in_dso) 475 source = sources[0] if sources else None 476 if source and source.file: 477 function.source_filename_id = self.get_string_id(source.file) 478 if source.line: 479 function.start_line = source.line 480 481 def add_line(self, source, dso_name): 482 line = Line() 483 function_id = self.get_function_id(source.function, dso_name, 0) 484 function = self.get_function(function_id) 485 function.source_filename_id = self.get_string_id(source.file) 486 line.function_id = function_id 487 line.line = source.line 488 return line 489 490 def gen_profile_sample(self, sample): 491 profile_sample = self.profile.sample.add() 492 profile_sample.location_id.extend(sample.location_ids) 493 sample_type_count = len(self.sample_types) * 2 494 values = [0] * sample_type_count 495 for id in sample.values.keys(): 496 values[id] = sample.values[id] 497 profile_sample.value.extend(values) 498 499 def gen_profile_mapping(self, mapping): 500 profile_mapping = self.profile.mapping.add() 501 profile_mapping.id = mapping.id 502 profile_mapping.memory_start = mapping.memory_start 503 profile_mapping.memory_limit = mapping.memory_limit 504 profile_mapping.file_offset = mapping.file_offset 505 profile_mapping.filename = mapping.filename_id 506 profile_mapping.build_id = mapping.build_id_id 507 profile_mapping.has_filenames = True 508 profile_mapping.has_functions = True 509 profile_mapping.has_line_numbers = True 510 profile_mapping.has_inline_frames = True 511 512 def gen_profile_location(self, location): 513 profile_location = self.profile.location.add() 514 profile_location.id = location.id 515 profile_location.mapping_id = location.mapping_id 516 profile_location.address = location.address 517 for i in range(len(location.lines)): 518 line = profile_location.line.add() 519 line.function_id = location.lines[i].function_id 520 line.line = location.lines[i].line 521 522 def gen_profile_function(self, function): 523 profile_function = self.profile.function.add() 524 profile_function.id = function.id 525 profile_function.name = function.name_id 526 profile_function.system_name = function.name_id 527 profile_function.filename = function.source_filename_id 528 profile_function.start_line = function.start_line 529 530 531def main(): 532 parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') 533 parser.add_argument('--show', nargs=1, help='print existing profile.pprof') 534 parser.add_argument('--config', nargs=1, default='pprof_proto_generator.config', 535 help='Set config file, default is gen_pprof_proto.config.') 536 args = parser.parse_args(sys.argv[1:]) 537 if args.show: 538 profile = load_pprof_profile(args.show[0]) 539 printer = PprofProfilePrinter(profile) 540 printer.show() 541 return 542 config = load_config(args.config) 543 generator = PprofProfileGenerator(config) 544 profile = generator.gen() 545 store_pprof_profile(config['output_file'], profile) 546 547 548if __name__ == '__main__': 549 main() 550