1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30import bisect 31import collections 32import ctypes 33import disasm 34import mmap 35import optparse 36import os 37import re 38import subprocess 39import sys 40import time 41 42 43USAGE="""usage: %prog [OPTION]... 44 45Analyses V8 and perf logs to produce profiles. 46 47Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55This will produce a binary trace file (perf.data) that %prog can analyse. 56 57IMPORTANT: 58 The kernel has an internal maximum for events per second, it is 100K by 59 default. That's not enough for "-c 10000". Set it to some higher value: 60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate 61 You can also make the warning about kernel address maps go away: 62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict 63 64We have a convenience script that handles all of the above for you: 65 $ tools/run-llprof.sh ./d8 bench.js 66 67Examples: 68 # Print flat profile with annotated disassembly for the 10 top 69 # symbols. Use default log names and include the snapshot log. 70 $ %prog --snapshot --disasm-top=10 71 72 # Print flat profile with annotated disassembly for all used symbols. 73 # Use default log names and include kernel symbols into analysis. 74 $ %prog --disasm-all --kernel 75 76 # Print flat profile. Use custom log names. 77 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 78""" 79 80 81JS_ORIGIN = "js" 82JS_SNAPSHOT_ORIGIN = "js-snapshot" 83 84class Code(object): 85 """Code object.""" 86 87 _id = 0 88 UNKNOWN = 0 89 V8INTERNAL = 1 90 FULL_CODEGEN = 2 91 OPTIMIZED = 3 92 93 def __init__(self, name, start_address, end_address, origin, origin_offset): 94 self.id = Code._id 95 Code._id += 1 96 self.name = name 97 self.other_names = None 98 self.start_address = start_address 99 self.end_address = end_address 100 self.origin = origin 101 self.origin_offset = origin_offset 102 self.self_ticks = 0 103 self.self_ticks_map = None 104 self.callee_ticks = None 105 if name.startswith("LazyCompile:*"): 106 self.codetype = Code.OPTIMIZED 107 elif name.startswith("LazyCompile:"): 108 self.codetype = Code.FULL_CODEGEN 109 elif name.startswith("v8::internal::"): 110 self.codetype = Code.V8INTERNAL 111 else: 112 self.codetype = Code.UNKNOWN 113 114 def AddName(self, name): 115 assert self.name != name 116 if self.other_names is None: 117 self.other_names = [name] 118 return 119 if not name in self.other_names: 120 self.other_names.append(name) 121 122 def FullName(self): 123 if self.other_names is None: 124 return self.name 125 self.other_names.sort() 126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 127 128 def IsUsed(self): 129 return self.self_ticks > 0 or self.callee_ticks is not None 130 131 def Tick(self, pc): 132 self.self_ticks += 1 133 if self.self_ticks_map is None: 134 self.self_ticks_map = collections.defaultdict(lambda: 0) 135 offset = pc - self.start_address 136 self.self_ticks_map[offset] += 1 137 138 def CalleeTick(self, callee): 139 if self.callee_ticks is None: 140 self.callee_ticks = collections.defaultdict(lambda: 0) 141 self.callee_ticks[callee] += 1 142 143 def PrintAnnotated(self, arch, options): 144 if self.self_ticks_map is None: 145 ticks_map = [] 146 else: 147 ticks_map = self.self_ticks_map.items() 148 # Convert the ticks map to offsets and counts arrays so that later 149 # we can do binary search in the offsets array. 150 ticks_map.sort(key=lambda t: t[0]) 151 ticks_offsets = [t[0] for t in ticks_map] 152 ticks_counts = [t[1] for t in ticks_map] 153 # Get a list of disassembled lines and their addresses. 154 lines = self._GetDisasmLines(arch, options) 155 if len(lines) == 0: 156 return 157 # Print annotated lines. 158 address = lines[0][0] 159 total_count = 0 160 for i in xrange(len(lines)): 161 start_offset = lines[i][0] - address 162 if i == len(lines) - 1: 163 end_offset = self.end_address - self.start_address 164 else: 165 end_offset = lines[i + 1][0] - address 166 # Ticks (reported pc values) are not always precise, i.e. not 167 # necessarily point at instruction starts. So we have to search 168 # for ticks that touch the current instruction line. 169 j = bisect.bisect_left(ticks_offsets, end_offset) 170 count = 0 171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 172 if offset < start_offset: 173 break 174 count += cnt 175 total_count += count 176 count = 100.0 * count / self.self_ticks 177 if count >= 0.01: 178 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1]) 179 else: 180 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1]) 181 print 182 assert total_count == self.self_ticks, \ 183 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 184 185 def __str__(self): 186 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 187 self.name, 188 self.start_address, 189 self.end_address, 190 self.end_address - self.start_address, 191 self.origin) 192 193 def _GetDisasmLines(self, arch, options): 194 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: 195 inplace = False 196 filename = options.log + ".ll" 197 else: 198 inplace = True 199 filename = self.origin 200 return disasm.GetDisasmLines(filename, 201 self.origin_offset, 202 self.end_address - self.start_address, 203 arch, 204 inplace) 205 206 207class CodePage(object): 208 """Group of adjacent code objects.""" 209 210 SHIFT = 20 # 1M pages 211 SIZE = (1 << SHIFT) 212 MASK = ~(SIZE - 1) 213 214 @staticmethod 215 def PageAddress(address): 216 return address & CodePage.MASK 217 218 @staticmethod 219 def PageId(address): 220 return address >> CodePage.SHIFT 221 222 @staticmethod 223 def PageAddressFromId(id): 224 return id << CodePage.SHIFT 225 226 def __init__(self, address): 227 self.address = address 228 self.code_objects = [] 229 230 def Add(self, code): 231 self.code_objects.append(code) 232 233 def Remove(self, code): 234 self.code_objects.remove(code) 235 236 def Find(self, pc): 237 code_objects = self.code_objects 238 for i, code in enumerate(code_objects): 239 if code.start_address <= pc < code.end_address: 240 code_objects[0], code_objects[i] = code, code_objects[0] 241 return code 242 return None 243 244 def __iter__(self): 245 return self.code_objects.__iter__() 246 247 248class CodeMap(object): 249 """Code object map.""" 250 251 def __init__(self): 252 self.pages = {} 253 self.min_address = 1 << 64 254 self.max_address = -1 255 256 def Add(self, code, max_pages=-1): 257 page_id = CodePage.PageId(code.start_address) 258 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 259 pages = 0 260 while page_id < limit_id: 261 if max_pages >= 0 and pages > max_pages: 262 print >>sys.stderr, \ 263 "Warning: page limit (%d) reached for %s [%s]" % ( 264 max_pages, code.name, code.origin) 265 break 266 if page_id in self.pages: 267 page = self.pages[page_id] 268 else: 269 page = CodePage(CodePage.PageAddressFromId(page_id)) 270 self.pages[page_id] = page 271 page.Add(code) 272 page_id += 1 273 pages += 1 274 self.min_address = min(self.min_address, code.start_address) 275 self.max_address = max(self.max_address, code.end_address) 276 277 def Remove(self, code): 278 page_id = CodePage.PageId(code.start_address) 279 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 280 removed = False 281 while page_id < limit_id: 282 if page_id not in self.pages: 283 page_id += 1 284 continue 285 page = self.pages[page_id] 286 page.Remove(code) 287 removed = True 288 page_id += 1 289 return removed 290 291 def AllCode(self): 292 for page in self.pages.itervalues(): 293 for code in page: 294 if CodePage.PageAddress(code.start_address) == page.address: 295 yield code 296 297 def UsedCode(self): 298 for code in self.AllCode(): 299 if code.IsUsed(): 300 yield code 301 302 def Print(self): 303 for code in self.AllCode(): 304 print code 305 306 def Find(self, pc): 307 if pc < self.min_address or pc >= self.max_address: 308 return None 309 page_id = CodePage.PageId(pc) 310 if page_id not in self.pages: 311 return None 312 return self.pages[page_id].Find(pc) 313 314 315class CodeInfo(object): 316 """Generic info about generated code objects.""" 317 318 def __init__(self, arch, header_size): 319 self.arch = arch 320 self.header_size = header_size 321 322 323class SnapshotLogReader(object): 324 """V8 snapshot log reader.""" 325 326 _SNAPSHOT_CODE_NAME_RE = re.compile( 327 r"snapshot-code-name,(\d+),\"(.*)\"") 328 329 def __init__(self, log_name): 330 self.log_name = log_name 331 332 def ReadNameMap(self): 333 log = open(self.log_name, "r") 334 try: 335 snapshot_pos_to_name = {} 336 for line in log: 337 match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line) 338 if match: 339 pos = int(match.group(1)) 340 name = match.group(2) 341 snapshot_pos_to_name[pos] = name 342 finally: 343 log.close() 344 return snapshot_pos_to_name 345 346 347class LogReader(object): 348 """V8 low-level (binary) log reader.""" 349 350 _ARCH_TO_POINTER_TYPE_MAP = { 351 "ia32": ctypes.c_uint32, 352 "arm": ctypes.c_uint32, 353 "mips": ctypes.c_uint32, 354 "x64": ctypes.c_uint64, 355 "arm64": ctypes.c_uint64 356 } 357 358 _CODE_CREATE_TAG = "C" 359 _CODE_MOVE_TAG = "M" 360 _CODE_DELETE_TAG = "D" 361 _SNAPSHOT_POSITION_TAG = "P" 362 _CODE_MOVING_GC_TAG = "G" 363 364 def __init__(self, log_name, code_map, snapshot_pos_to_name): 365 self.log_file = open(log_name, "r") 366 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 367 self.log_pos = 0 368 self.code_map = code_map 369 self.snapshot_pos_to_name = snapshot_pos_to_name 370 self.address_to_snapshot_name = {} 371 372 self.arch = self.log[:self.log.find("\0")] 373 self.log_pos += len(self.arch) + 1 374 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 375 "Unsupported architecture %s" % self.arch 376 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 377 378 self.code_create_struct = LogReader._DefineStruct([ 379 ("name_size", ctypes.c_int32), 380 ("code_address", pointer_type), 381 ("code_size", ctypes.c_int32)]) 382 383 self.code_move_struct = LogReader._DefineStruct([ 384 ("from_address", pointer_type), 385 ("to_address", pointer_type)]) 386 387 self.code_delete_struct = LogReader._DefineStruct([ 388 ("address", pointer_type)]) 389 390 self.snapshot_position_struct = LogReader._DefineStruct([ 391 ("address", pointer_type), 392 ("position", ctypes.c_int32)]) 393 394 def ReadUpToGC(self): 395 while self.log_pos < self.log.size(): 396 tag = self.log[self.log_pos] 397 self.log_pos += 1 398 399 if tag == LogReader._CODE_MOVING_GC_TAG: 400 self.address_to_snapshot_name.clear() 401 return 402 403 if tag == LogReader._CODE_CREATE_TAG: 404 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 405 self.log_pos += ctypes.sizeof(event) 406 start_address = event.code_address 407 end_address = start_address + event.code_size 408 if start_address in self.address_to_snapshot_name: 409 name = self.address_to_snapshot_name[start_address] 410 origin = JS_SNAPSHOT_ORIGIN 411 else: 412 name = self.log[self.log_pos:self.log_pos + event.name_size] 413 origin = JS_ORIGIN 414 self.log_pos += event.name_size 415 origin_offset = self.log_pos 416 self.log_pos += event.code_size 417 code = Code(name, start_address, end_address, origin, origin_offset) 418 conficting_code = self.code_map.Find(start_address) 419 if conficting_code: 420 if not (conficting_code.start_address == code.start_address and 421 conficting_code.end_address == code.end_address): 422 self.code_map.Remove(conficting_code) 423 else: 424 LogReader._HandleCodeConflict(conficting_code, code) 425 # TODO(vitalyr): this warning is too noisy because of our 426 # attempts to reconstruct code log from the snapshot. 427 # print >>sys.stderr, \ 428 # "Warning: Skipping duplicate code log entry %s" % code 429 continue 430 self.code_map.Add(code) 431 continue 432 433 if tag == LogReader._CODE_MOVE_TAG: 434 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 435 self.log_pos += ctypes.sizeof(event) 436 old_start_address = event.from_address 437 new_start_address = event.to_address 438 if old_start_address == new_start_address: 439 # Skip useless code move entries. 440 continue 441 code = self.code_map.Find(old_start_address) 442 if not code: 443 print >>sys.stderr, "Warning: Not found %x" % old_start_address 444 continue 445 assert code.start_address == old_start_address, \ 446 "Inexact move address %x for %s" % (old_start_address, code) 447 self.code_map.Remove(code) 448 size = code.end_address - code.start_address 449 code.start_address = new_start_address 450 code.end_address = new_start_address + size 451 self.code_map.Add(code) 452 continue 453 454 if tag == LogReader._CODE_DELETE_TAG: 455 event = self.code_delete_struct.from_buffer(self.log, self.log_pos) 456 self.log_pos += ctypes.sizeof(event) 457 old_start_address = event.address 458 code = self.code_map.Find(old_start_address) 459 if not code: 460 print >>sys.stderr, "Warning: Not found %x" % old_start_address 461 continue 462 assert code.start_address == old_start_address, \ 463 "Inexact delete address %x for %s" % (old_start_address, code) 464 self.code_map.Remove(code) 465 continue 466 467 if tag == LogReader._SNAPSHOT_POSITION_TAG: 468 event = self.snapshot_position_struct.from_buffer(self.log, 469 self.log_pos) 470 self.log_pos += ctypes.sizeof(event) 471 start_address = event.address 472 snapshot_pos = event.position 473 if snapshot_pos in self.snapshot_pos_to_name: 474 self.address_to_snapshot_name[start_address] = \ 475 self.snapshot_pos_to_name[snapshot_pos] 476 continue 477 478 assert False, "Unknown tag %s" % tag 479 480 def Dispose(self): 481 self.log.close() 482 self.log_file.close() 483 484 @staticmethod 485 def _DefineStruct(fields): 486 class Struct(ctypes.Structure): 487 _fields_ = fields 488 return Struct 489 490 @staticmethod 491 def _HandleCodeConflict(old_code, new_code): 492 assert (old_code.start_address == new_code.start_address and 493 old_code.end_address == new_code.end_address), \ 494 "Conficting code log entries %s and %s" % (old_code, new_code) 495 if old_code.name == new_code.name: 496 return 497 # Code object may be shared by a few functions. Collect the full 498 # set of names. 499 old_code.AddName(new_code.name) 500 501 502class Descriptor(object): 503 """Descriptor of a structure in the binary trace log.""" 504 505 CTYPE_MAP = { 506 "u16": ctypes.c_uint16, 507 "u32": ctypes.c_uint32, 508 "u64": ctypes.c_uint64 509 } 510 511 def __init__(self, fields): 512 class TraceItem(ctypes.Structure): 513 _fields_ = Descriptor.CtypesFields(fields) 514 515 def __str__(self): 516 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 517 for field, _ in TraceItem._fields_) 518 519 self.ctype = TraceItem 520 521 def Read(self, trace, offset): 522 return self.ctype.from_buffer(trace, offset) 523 524 @staticmethod 525 def CtypesFields(fields): 526 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 527 528 529# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 530# for the gory details. 531 532 533# Reference: struct perf_file_header in kernel/tools/perf/util/header.h 534TRACE_HEADER_DESC = Descriptor([ 535 ("magic", "u64"), 536 ("size", "u64"), 537 ("attr_size", "u64"), 538 ("attrs_offset", "u64"), 539 ("attrs_size", "u64"), 540 ("data_offset", "u64"), 541 ("data_size", "u64"), 542 ("event_types_offset", "u64"), 543 ("event_types_size", "u64") 544]) 545 546 547# Reference: /usr/include/linux/perf_event.h 548PERF_EVENT_ATTR_DESC = Descriptor([ 549 ("type", "u32"), 550 ("size", "u32"), 551 ("config", "u64"), 552 ("sample_period_or_freq", "u64"), 553 ("sample_type", "u64"), 554 ("read_format", "u64"), 555 ("flags", "u64"), 556 ("wakeup_events_or_watermark", "u32"), 557 ("bp_type", "u32"), 558 ("bp_addr", "u64"), 559 ("bp_len", "u64") 560]) 561 562 563# Reference: /usr/include/linux/perf_event.h 564PERF_EVENT_HEADER_DESC = Descriptor([ 565 ("type", "u32"), 566 ("misc", "u16"), 567 ("size", "u16") 568]) 569 570 571# Reference: kernel/tools/perf/util/event.h 572PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 573 ("pid", "u32"), 574 ("tid", "u32"), 575 ("addr", "u64"), 576 ("len", "u64"), 577 ("pgoff", "u64") 578]) 579 580# Reference: kernel/tools/perf/util/event.h 581PERF_MMAP2_EVENT_BODY_DESC = Descriptor([ 582 ("pid", "u32"), 583 ("tid", "u32"), 584 ("addr", "u64"), 585 ("len", "u64"), 586 ("pgoff", "u64"), 587 ("maj", "u32"), 588 ("min", "u32"), 589 ("ino", "u64"), 590 ("ino_generation", "u64"), 591 ("prot", "u32"), 592 ("flags","u32") 593]) 594 595# perf_event_attr.sample_type bits control the set of 596# perf_sample_event fields. 597PERF_SAMPLE_IP = 1 << 0 598PERF_SAMPLE_TID = 1 << 1 599PERF_SAMPLE_TIME = 1 << 2 600PERF_SAMPLE_ADDR = 1 << 3 601PERF_SAMPLE_READ = 1 << 4 602PERF_SAMPLE_CALLCHAIN = 1 << 5 603PERF_SAMPLE_ID = 1 << 6 604PERF_SAMPLE_CPU = 1 << 7 605PERF_SAMPLE_PERIOD = 1 << 8 606PERF_SAMPLE_STREAM_ID = 1 << 9 607PERF_SAMPLE_RAW = 1 << 10 608 609 610# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. 611PERF_SAMPLE_EVENT_BODY_FIELDS = [ 612 ("ip", "u64", PERF_SAMPLE_IP), 613 ("pid", "u32", PERF_SAMPLE_TID), 614 ("tid", "u32", PERF_SAMPLE_TID), 615 ("time", "u64", PERF_SAMPLE_TIME), 616 ("addr", "u64", PERF_SAMPLE_ADDR), 617 ("id", "u64", PERF_SAMPLE_ID), 618 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 619 ("cpu", "u32", PERF_SAMPLE_CPU), 620 ("res", "u32", PERF_SAMPLE_CPU), 621 ("period", "u64", PERF_SAMPLE_PERIOD), 622 # Don't want to handle read format that comes after the period and 623 # before the callchain and has variable size. 624 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 625 # Raw data follows the callchain and is ignored. 626] 627 628 629PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 630 631 632PERF_RECORD_MMAP = 1 633PERF_RECORD_MMAP2 = 10 634PERF_RECORD_SAMPLE = 9 635 636 637class TraceReader(object): 638 """Perf (linux-2.6/tools/perf) trace file reader.""" 639 640 _TRACE_HEADER_MAGIC = 4993446653023372624 641 642 def __init__(self, trace_name): 643 self.trace_file = open(trace_name, "r") 644 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 645 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 646 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 647 print >>sys.stderr, "Warning: unsupported trace header magic" 648 self.offset = self.trace_header.data_offset 649 self.limit = self.trace_header.data_offset + self.trace_header.data_size 650 assert self.limit <= self.trace.size(), \ 651 "Trace data limit exceeds trace file size" 652 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 653 assert self.trace_header.attrs_size != 0, \ 654 "No perf event attributes found in the trace" 655 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 656 self.trace_header.attrs_offset) 657 self.sample_event_body_desc = self._SampleEventBodyDesc( 658 perf_event_attr.sample_type) 659 self.callchain_supported = \ 660 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 661 if self.callchain_supported: 662 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 663 self.ip_size = ctypes.sizeof(self.ip_struct) 664 665 def ReadEventHeader(self): 666 if self.offset >= self.limit: 667 return None, 0 668 offset = self.offset 669 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 670 self.offset += header.size 671 return header, offset 672 673 def ReadMmap(self, header, offset): 674 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 675 offset + self.header_size) 676 # Read null-terminated filename. 677 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 678 offset + header.size] 679 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 680 return mmap_info 681 682 def ReadMmap2(self, header, offset): 683 mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace, 684 offset + self.header_size) 685 # Read null-terminated filename. 686 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 687 offset + header.size] 688 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 689 return mmap_info 690 691 def ReadSample(self, header, offset): 692 sample = self.sample_event_body_desc.Read(self.trace, 693 offset + self.header_size) 694 if not self.callchain_supported: 695 return sample 696 sample.ips = [] 697 offset += self.header_size + ctypes.sizeof(sample) 698 for _ in xrange(sample.nr): 699 sample.ips.append( 700 self.ip_struct.from_buffer(self.trace, offset).value) 701 offset += self.ip_size 702 return sample 703 704 def Dispose(self): 705 self.trace.close() 706 self.trace_file.close() 707 708 def _SampleEventBodyDesc(self, sample_type): 709 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 710 "Can't hande read format in samples" 711 fields = [(field, format) 712 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 713 if (bit & sample_type) != 0] 714 return Descriptor(fields) 715 716 717OBJDUMP_SECTION_HEADER_RE = re.compile( 718 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 719OBJDUMP_SYMBOL_LINE_RE = re.compile( 720 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 721OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 722 r"^DYNAMIC SYMBOL TABLE") 723OBJDUMP_SKIP_RE = re.compile( 724 r"^.*ld\.so\.cache$") 725KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 726PERF_KERNEL_ALLSYMS_RE = re.compile( 727 r".*kallsyms.*") 728KERNEL_ALLSYMS_LINE_RE = re.compile( 729 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 730 731 732class LibraryRepo(object): 733 def __init__(self): 734 self.infos = [] 735 self.names = set() 736 self.ticks = {} 737 738 739 def HasDynamicSymbols(self, filename): 740 if filename.endswith(".ko"): return False 741 process = subprocess.Popen( 742 "%s -h %s" % (OBJDUMP_BIN, filename), 743 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 744 pipe = process.stdout 745 try: 746 for line in pipe: 747 match = OBJDUMP_SECTION_HEADER_RE.match(line) 748 if match and match.group(1) == 'dynsym': return True 749 finally: 750 pipe.close() 751 assert process.wait() == 0, "Failed to objdump -h %s" % filename 752 return False 753 754 755 def Load(self, mmap_info, code_map, options): 756 # Skip kernel mmaps when requested using the fact that their tid 757 # is 0. 758 if mmap_info.tid == 0 and not options.kernel: 759 return True 760 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 761 return True 762 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 763 return self._LoadKernelSymbols(code_map) 764 self.infos.append(mmap_info) 765 mmap_info.ticks = 0 766 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 767 if not os.path.exists(mmap_info.filename): 768 return True 769 # Request section headers (-h), symbols (-t), and dynamic symbols 770 # (-T) from objdump. 771 # Unfortunately, section headers span two lines, so we have to 772 # keep the just seen section name (from the first line in each 773 # section header) in the after_section variable. 774 if self.HasDynamicSymbols(mmap_info.filename): 775 dynamic_symbols = "-T" 776 else: 777 dynamic_symbols = "" 778 process = subprocess.Popen( 779 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), 780 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 781 pipe = process.stdout 782 after_section = None 783 code_sections = set() 784 reloc_sections = set() 785 dynamic = False 786 try: 787 for line in pipe: 788 if after_section: 789 if line.find("CODE") != -1: 790 code_sections.add(after_section) 791 if line.find("RELOC") != -1: 792 reloc_sections.add(after_section) 793 after_section = None 794 continue 795 796 match = OBJDUMP_SECTION_HEADER_RE.match(line) 797 if match: 798 after_section = match.group(1) 799 continue 800 801 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 802 dynamic = True 803 continue 804 805 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 806 if match: 807 start_address = int(match.group(1), 16) 808 origin_offset = start_address 809 flags = match.group(2) 810 section = match.group(3) 811 if section in code_sections: 812 if dynamic or section in reloc_sections: 813 start_address += mmap_info.addr 814 size = int(match.group(4), 16) 815 name = match.group(5) 816 origin = mmap_info.filename 817 code_map.Add(Code(name, start_address, start_address + size, 818 origin, origin_offset)) 819 finally: 820 pipe.close() 821 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 822 823 def Tick(self, pc): 824 for i, mmap_info in enumerate(self.infos): 825 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 826 mmap_info.ticks += 1 827 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 828 return True 829 return False 830 831 def _UniqueMmapName(self, mmap_info): 832 name = mmap_info.filename 833 index = 1 834 while name in self.names: 835 name = "%s-%d" % (mmap_info.filename, index) 836 index += 1 837 self.names.add(name) 838 return name 839 840 def _LoadKernelSymbols(self, code_map): 841 if not os.path.exists(KERNEL_ALLSYMS_FILE): 842 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 843 return False 844 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 845 code = None 846 for line in kallsyms: 847 match = KERNEL_ALLSYMS_LINE_RE.match(line) 848 if match: 849 start_address = int(match.group(1), 16) 850 end_address = start_address 851 name = match.group(2) 852 if code: 853 code.end_address = start_address 854 code_map.Add(code, 16) 855 code = Code(name, start_address, end_address, "kernel", 0) 856 return True 857 858 859def PrintReport(code_map, library_repo, arch, ticks, options): 860 print "Ticks per symbol:" 861 used_code = [code for code in code_map.UsedCode()] 862 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 863 for i, code in enumerate(used_code): 864 code_ticks = code.self_ticks 865 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 866 code.FullName(), code.origin) 867 if options.disasm_all or i < options.disasm_top: 868 code.PrintAnnotated(arch, options) 869 print 870 print "Ticks per library:" 871 mmap_infos = [m for m in library_repo.infos if m.ticks > 0] 872 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 873 for mmap_info in mmap_infos: 874 mmap_ticks = mmap_info.ticks 875 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 876 mmap_info.unique_name) 877 878 879def PrintDot(code_map, options): 880 print "digraph G {" 881 for code in code_map.UsedCode(): 882 if code.self_ticks < 10: 883 continue 884 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 885 if code.callee_ticks: 886 for callee, ticks in code.callee_ticks.iteritems(): 887 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 888 print "}" 889 890 891if __name__ == "__main__": 892 parser = optparse.OptionParser(USAGE) 893 parser.add_option("--snapshot-log", 894 default="obj/release/snapshot.log", 895 help="V8 snapshot log file name [default: %default]") 896 parser.add_option("--log", 897 default="v8.log", 898 help="V8 log file name [default: %default]") 899 parser.add_option("--snapshot", 900 default=False, 901 action="store_true", 902 help="process V8 snapshot log [default: %default]") 903 parser.add_option("--trace", 904 default="perf.data", 905 help="perf trace file name [default: %default]") 906 parser.add_option("--kernel", 907 default=False, 908 action="store_true", 909 help="process kernel entries [default: %default]") 910 parser.add_option("--disasm-top", 911 default=0, 912 type="int", 913 help=("number of top symbols to disassemble and annotate " 914 "[default: %default]")) 915 parser.add_option("--disasm-all", 916 default=False, 917 action="store_true", 918 help=("disassemble and annotate all used symbols " 919 "[default: %default]")) 920 parser.add_option("--dot", 921 default=False, 922 action="store_true", 923 help="produce dot output (WIP) [default: %default]") 924 parser.add_option("--quiet", "-q", 925 default=False, 926 action="store_true", 927 help="no auxiliary messages [default: %default]") 928 parser.add_option("--gc-fake-mmap", 929 default="/tmp/__v8_gc__", 930 help="gc fake mmap file [default: %default]") 931 parser.add_option("--objdump", 932 default="/usr/bin/objdump", 933 help="objdump tool to use [default: %default]") 934 parser.add_option("--host-root", 935 default="", 936 help="Path to the host root [default: %default]") 937 options, args = parser.parse_args() 938 939 if not options.quiet: 940 if options.snapshot: 941 print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log, 942 options.log, 943 options.log) 944 else: 945 print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log) 946 print "Perf trace file: %s" % options.trace 947 948 V8_GC_FAKE_MMAP = options.gc_fake_mmap 949 HOST_ROOT = options.host_root 950 if os.path.exists(options.objdump): 951 disasm.OBJDUMP_BIN = options.objdump 952 OBJDUMP_BIN = options.objdump 953 else: 954 print "Cannot find %s, falling back to default objdump" % options.objdump 955 956 # Stats. 957 events = 0 958 ticks = 0 959 missed_ticks = 0 960 really_missed_ticks = 0 961 optimized_ticks = 0 962 generated_ticks = 0 963 v8_internal_ticks = 0 964 mmap_time = 0 965 sample_time = 0 966 967 # Process the snapshot log to fill the snapshot name map. 968 snapshot_name_map = {} 969 if options.snapshot: 970 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log) 971 snapshot_name_map = snapshot_log_reader.ReadNameMap() 972 973 # Initialize the log reader. 974 code_map = CodeMap() 975 log_reader = LogReader(log_name=options.log + ".ll", 976 code_map=code_map, 977 snapshot_pos_to_name=snapshot_name_map) 978 if not options.quiet: 979 print "Generated code architecture: %s" % log_reader.arch 980 print 981 sys.stdout.flush() 982 983 # Process the code and trace logs. 984 library_repo = LibraryRepo() 985 log_reader.ReadUpToGC() 986 trace_reader = TraceReader(options.trace) 987 while True: 988 header, offset = trace_reader.ReadEventHeader() 989 if not header: 990 break 991 events += 1 992 if header.type == PERF_RECORD_MMAP: 993 start = time.time() 994 mmap_info = trace_reader.ReadMmap(header, offset) 995 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 996 log_reader.ReadUpToGC() 997 else: 998 library_repo.Load(mmap_info, code_map, options) 999 mmap_time += time.time() - start 1000 elif header.type == PERF_RECORD_MMAP2: 1001 start = time.time() 1002 mmap_info = trace_reader.ReadMmap2(header, offset) 1003 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 1004 log_reader.ReadUpToGC() 1005 else: 1006 library_repo.Load(mmap_info, code_map, options) 1007 mmap_time += time.time() - start 1008 elif header.type == PERF_RECORD_SAMPLE: 1009 ticks += 1 1010 start = time.time() 1011 sample = trace_reader.ReadSample(header, offset) 1012 code = code_map.Find(sample.ip) 1013 if code: 1014 code.Tick(sample.ip) 1015 if code.codetype == Code.OPTIMIZED: 1016 optimized_ticks += 1 1017 elif code.codetype == Code.FULL_CODEGEN: 1018 generated_ticks += 1 1019 elif code.codetype == Code.V8INTERNAL: 1020 v8_internal_ticks += 1 1021 else: 1022 missed_ticks += 1 1023 if not library_repo.Tick(sample.ip) and not code: 1024 really_missed_ticks += 1 1025 if trace_reader.callchain_supported: 1026 for ip in sample.ips: 1027 caller_code = code_map.Find(ip) 1028 if caller_code: 1029 if code: 1030 caller_code.CalleeTick(code) 1031 code = caller_code 1032 sample_time += time.time() - start 1033 1034 if options.dot: 1035 PrintDot(code_map, options) 1036 else: 1037 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 1038 1039 if not options.quiet: 1040 def PrintTicks(number, total, description): 1041 print("%10d %5.1f%% ticks in %s" % 1042 (number, 100.0*number/total, description)) 1043 print 1044 print "Stats:" 1045 print "%10d total trace events" % events 1046 print "%10d total ticks" % ticks 1047 print "%10d ticks not in symbols" % missed_ticks 1048 unaccounted = "unaccounted ticks" 1049 if really_missed_ticks > 0: 1050 unaccounted += " (probably in the kernel, try --kernel)" 1051 PrintTicks(really_missed_ticks, ticks, unaccounted) 1052 PrintTicks(optimized_ticks, ticks, "ticks in optimized code") 1053 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") 1054 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") 1055 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 1056 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 1057 print "%9.2fs library processing time" % mmap_time 1058 print "%9.2fs tick processing time" % sample_time 1059 1060 log_reader.Dispose() 1061 trace_reader.Dispose() 1062