1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30import bisect 31import collections 32import ctypes 33import disasm 34import mmap 35import optparse 36import os 37import re 38import subprocess 39import sys 40import time 41 42 43USAGE="""usage: %prog [OPTION]... 44 45Analyses V8 and perf logs to produce profiles. 46 47Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55This will produce a binary trace file (perf.data) that %prog can analyse. 56 57IMPORTANT: 58 The kernel has an internal maximum for events per second, it is 100K by 59 default. That's not enough for "-c 10000". Set it to some higher value: 60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate 61 You can also make the warning about kernel address maps go away: 62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict 63 64We have a convenience script that handles all of the above for you: 65 $ tools/run-llprof.sh ./d8 bench.js 66 67Examples: 68 # Print flat profile with annotated disassembly for the 10 top 69 # symbols. Use default log names and include the snapshot log. 70 $ %prog --snapshot --disasm-top=10 71 72 # Print flat profile with annotated disassembly for all used symbols. 73 # Use default log names and include kernel symbols into analysis. 74 $ %prog --disasm-all --kernel 75 76 # Print flat profile. Use custom log names. 77 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 78""" 79 80 81JS_ORIGIN = "js" 82JS_SNAPSHOT_ORIGIN = "js-snapshot" 83 84class Code(object): 85 """Code object.""" 86 87 _id = 0 88 UNKNOWN = 0 89 V8INTERNAL = 1 90 FULL_CODEGEN = 2 91 OPTIMIZED = 3 92 93 def __init__(self, name, start_address, end_address, origin, origin_offset): 94 self.id = Code._id 95 Code._id += 1 96 self.name = name 97 self.other_names = None 98 self.start_address = start_address 99 self.end_address = end_address 100 self.origin = origin 101 self.origin_offset = origin_offset 102 self.self_ticks = 0 103 self.self_ticks_map = None 104 self.callee_ticks = None 105 if name.startswith("LazyCompile:*"): 106 self.codetype = Code.OPTIMIZED 107 elif name.startswith("LazyCompile:"): 108 self.codetype = Code.FULL_CODEGEN 109 elif name.startswith("v8::internal::"): 110 self.codetype = Code.V8INTERNAL 111 else: 112 self.codetype = Code.UNKNOWN 113 114 def AddName(self, name): 115 assert self.name != name 116 if self.other_names is None: 117 self.other_names = [name] 118 return 119 if not name in self.other_names: 120 self.other_names.append(name) 121 122 def FullName(self): 123 if self.other_names is None: 124 return self.name 125 self.other_names.sort() 126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 127 128 def IsUsed(self): 129 return self.self_ticks > 0 or self.callee_ticks is not None 130 131 def Tick(self, pc): 132 self.self_ticks += 1 133 if self.self_ticks_map is None: 134 self.self_ticks_map = collections.defaultdict(lambda: 0) 135 offset = pc - self.start_address 136 self.self_ticks_map[offset] += 1 137 138 def CalleeTick(self, callee): 139 if self.callee_ticks is None: 140 self.callee_ticks = collections.defaultdict(lambda: 0) 141 self.callee_ticks[callee] += 1 142 143 def PrintAnnotated(self, arch, options): 144 if self.self_ticks_map is None: 145 ticks_map = [] 146 else: 147 ticks_map = self.self_ticks_map.items() 148 # Convert the ticks map to offsets and counts arrays so that later 149 # we can do binary search in the offsets array. 150 ticks_map.sort(key=lambda t: t[0]) 151 ticks_offsets = [t[0] for t in ticks_map] 152 ticks_counts = [t[1] for t in ticks_map] 153 # Get a list of disassembled lines and their addresses. 154 lines = self._GetDisasmLines(arch, options) 155 if len(lines) == 0: 156 return 157 # Print annotated lines. 158 address = lines[0][0] 159 total_count = 0 160 for i in xrange(len(lines)): 161 start_offset = lines[i][0] - address 162 if i == len(lines) - 1: 163 end_offset = self.end_address - self.start_address 164 else: 165 end_offset = lines[i + 1][0] - address 166 # Ticks (reported pc values) are not always precise, i.e. not 167 # necessarily point at instruction starts. So we have to search 168 # for ticks that touch the current instruction line. 169 j = bisect.bisect_left(ticks_offsets, end_offset) 170 count = 0 171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 172 if offset < start_offset: 173 break 174 count += cnt 175 total_count += count 176 count = 100.0 * count / self.self_ticks 177 if count >= 0.01: 178 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1]) 179 else: 180 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1]) 181 print 182 assert total_count == self.self_ticks, \ 183 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 184 185 def __str__(self): 186 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 187 self.name, 188 self.start_address, 189 self.end_address, 190 self.end_address - self.start_address, 191 self.origin) 192 193 def _GetDisasmLines(self, arch, options): 194 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: 195 inplace = False 196 filename = options.log + ".ll" 197 else: 198 inplace = True 199 filename = self.origin 200 return disasm.GetDisasmLines(filename, 201 self.origin_offset, 202 self.end_address - self.start_address, 203 arch, 204 inplace) 205 206 207class CodePage(object): 208 """Group of adjacent code objects.""" 209 210 SHIFT = 20 # 1M pages 211 SIZE = (1 << SHIFT) 212 MASK = ~(SIZE - 1) 213 214 @staticmethod 215 def PageAddress(address): 216 return address & CodePage.MASK 217 218 @staticmethod 219 def PageId(address): 220 return address >> CodePage.SHIFT 221 222 @staticmethod 223 def PageAddressFromId(id): 224 return id << CodePage.SHIFT 225 226 def __init__(self, address): 227 self.address = address 228 self.code_objects = [] 229 230 def Add(self, code): 231 self.code_objects.append(code) 232 233 def Remove(self, code): 234 self.code_objects.remove(code) 235 236 def Find(self, pc): 237 code_objects = self.code_objects 238 for i, code in enumerate(code_objects): 239 if code.start_address <= pc < code.end_address: 240 code_objects[0], code_objects[i] = code, code_objects[0] 241 return code 242 return None 243 244 def __iter__(self): 245 return self.code_objects.__iter__() 246 247 248class CodeMap(object): 249 """Code object map.""" 250 251 def __init__(self): 252 self.pages = {} 253 self.min_address = 1 << 64 254 self.max_address = -1 255 256 def Add(self, code, max_pages=-1): 257 page_id = CodePage.PageId(code.start_address) 258 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 259 pages = 0 260 while page_id < limit_id: 261 if max_pages >= 0 and pages > max_pages: 262 print >>sys.stderr, \ 263 "Warning: page limit (%d) reached for %s [%s]" % ( 264 max_pages, code.name, code.origin) 265 break 266 if page_id in self.pages: 267 page = self.pages[page_id] 268 else: 269 page = CodePage(CodePage.PageAddressFromId(page_id)) 270 self.pages[page_id] = page 271 page.Add(code) 272 page_id += 1 273 pages += 1 274 self.min_address = min(self.min_address, code.start_address) 275 self.max_address = max(self.max_address, code.end_address) 276 277 def Remove(self, code): 278 page_id = CodePage.PageId(code.start_address) 279 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 280 removed = False 281 while page_id < limit_id: 282 if page_id not in self.pages: 283 page_id += 1 284 continue 285 page = self.pages[page_id] 286 page.Remove(code) 287 removed = True 288 page_id += 1 289 return removed 290 291 def AllCode(self): 292 for page in self.pages.itervalues(): 293 for code in page: 294 if CodePage.PageAddress(code.start_address) == page.address: 295 yield code 296 297 def UsedCode(self): 298 for code in self.AllCode(): 299 if code.IsUsed(): 300 yield code 301 302 def Print(self): 303 for code in self.AllCode(): 304 print code 305 306 def Find(self, pc): 307 if pc < self.min_address or pc >= self.max_address: 308 return None 309 page_id = CodePage.PageId(pc) 310 if page_id not in self.pages: 311 return None 312 return self.pages[page_id].Find(pc) 313 314 315class CodeInfo(object): 316 """Generic info about generated code objects.""" 317 318 def __init__(self, arch, header_size): 319 self.arch = arch 320 self.header_size = header_size 321 322 323class SnapshotLogReader(object): 324 """V8 snapshot log reader.""" 325 326 _SNAPSHOT_CODE_NAME_RE = re.compile( 327 r"snapshot-code-name,(\d+),\"(.*)\"") 328 329 def __init__(self, log_name): 330 self.log_name = log_name 331 332 def ReadNameMap(self): 333 log = open(self.log_name, "r") 334 try: 335 snapshot_pos_to_name = {} 336 for line in log: 337 match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line) 338 if match: 339 pos = int(match.group(1)) 340 name = match.group(2) 341 snapshot_pos_to_name[pos] = name 342 finally: 343 log.close() 344 return snapshot_pos_to_name 345 346 347class LogReader(object): 348 """V8 low-level (binary) log reader.""" 349 350 _ARCH_TO_POINTER_TYPE_MAP = { 351 "ia32": ctypes.c_uint32, 352 "arm": ctypes.c_uint32, 353 "mips": ctypes.c_uint32, 354 "x64": ctypes.c_uint64, 355 "arm64": ctypes.c_uint64 356 } 357 358 _CODE_CREATE_TAG = "C" 359 _CODE_MOVE_TAG = "M" 360 _CODE_DELETE_TAG = "D" 361 _SNAPSHOT_POSITION_TAG = "P" 362 _CODE_MOVING_GC_TAG = "G" 363 364 def __init__(self, log_name, code_map, snapshot_pos_to_name): 365 self.log_file = open(log_name, "r") 366 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 367 self.log_pos = 0 368 self.code_map = code_map 369 self.snapshot_pos_to_name = snapshot_pos_to_name 370 self.address_to_snapshot_name = {} 371 372 self.arch = self.log[:self.log.find("\0")] 373 self.log_pos += len(self.arch) + 1 374 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 375 "Unsupported architecture %s" % self.arch 376 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 377 378 self.code_create_struct = LogReader._DefineStruct([ 379 ("name_size", ctypes.c_int32), 380 ("code_address", pointer_type), 381 ("code_size", ctypes.c_int32)]) 382 383 self.code_move_struct = LogReader._DefineStruct([ 384 ("from_address", pointer_type), 385 ("to_address", pointer_type)]) 386 387 self.code_delete_struct = LogReader._DefineStruct([ 388 ("address", pointer_type)]) 389 390 self.snapshot_position_struct = LogReader._DefineStruct([ 391 ("address", pointer_type), 392 ("position", ctypes.c_int32)]) 393 394 def ReadUpToGC(self): 395 while self.log_pos < self.log.size(): 396 tag = self.log[self.log_pos] 397 self.log_pos += 1 398 399 if tag == LogReader._CODE_MOVING_GC_TAG: 400 self.address_to_snapshot_name.clear() 401 return 402 403 if tag == LogReader._CODE_CREATE_TAG: 404 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 405 self.log_pos += ctypes.sizeof(event) 406 start_address = event.code_address 407 end_address = start_address + event.code_size 408 if start_address in self.address_to_snapshot_name: 409 name = self.address_to_snapshot_name[start_address] 410 origin = JS_SNAPSHOT_ORIGIN 411 else: 412 name = self.log[self.log_pos:self.log_pos + event.name_size] 413 origin = JS_ORIGIN 414 self.log_pos += event.name_size 415 origin_offset = self.log_pos 416 self.log_pos += event.code_size 417 code = Code(name, start_address, end_address, origin, origin_offset) 418 conficting_code = self.code_map.Find(start_address) 419 if conficting_code: 420 if not (conficting_code.start_address == code.start_address and 421 conficting_code.end_address == code.end_address): 422 self.code_map.Remove(conficting_code) 423 else: 424 LogReader._HandleCodeConflict(conficting_code, code) 425 # TODO(vitalyr): this warning is too noisy because of our 426 # attempts to reconstruct code log from the snapshot. 427 # print >>sys.stderr, \ 428 # "Warning: Skipping duplicate code log entry %s" % code 429 continue 430 self.code_map.Add(code) 431 continue 432 433 if tag == LogReader._CODE_MOVE_TAG: 434 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 435 self.log_pos += ctypes.sizeof(event) 436 old_start_address = event.from_address 437 new_start_address = event.to_address 438 if old_start_address == new_start_address: 439 # Skip useless code move entries. 440 continue 441 code = self.code_map.Find(old_start_address) 442 if not code: 443 print >>sys.stderr, "Warning: Not found %x" % old_start_address 444 continue 445 assert code.start_address == old_start_address, \ 446 "Inexact move address %x for %s" % (old_start_address, code) 447 self.code_map.Remove(code) 448 size = code.end_address - code.start_address 449 code.start_address = new_start_address 450 code.end_address = new_start_address + size 451 self.code_map.Add(code) 452 continue 453 454 if tag == LogReader._CODE_DELETE_TAG: 455 event = self.code_delete_struct.from_buffer(self.log, self.log_pos) 456 self.log_pos += ctypes.sizeof(event) 457 old_start_address = event.address 458 code = self.code_map.Find(old_start_address) 459 if not code: 460 print >>sys.stderr, "Warning: Not found %x" % old_start_address 461 continue 462 assert code.start_address == old_start_address, \ 463 "Inexact delete address %x for %s" % (old_start_address, code) 464 self.code_map.Remove(code) 465 continue 466 467 if tag == LogReader._SNAPSHOT_POSITION_TAG: 468 event = self.snapshot_position_struct.from_buffer(self.log, 469 self.log_pos) 470 self.log_pos += ctypes.sizeof(event) 471 start_address = event.address 472 snapshot_pos = event.position 473 if snapshot_pos in self.snapshot_pos_to_name: 474 self.address_to_snapshot_name[start_address] = \ 475 self.snapshot_pos_to_name[snapshot_pos] 476 continue 477 478 assert False, "Unknown tag %s" % tag 479 480 def Dispose(self): 481 self.log.close() 482 self.log_file.close() 483 484 @staticmethod 485 def _DefineStruct(fields): 486 class Struct(ctypes.Structure): 487 _fields_ = fields 488 return Struct 489 490 @staticmethod 491 def _HandleCodeConflict(old_code, new_code): 492 assert (old_code.start_address == new_code.start_address and 493 old_code.end_address == new_code.end_address), \ 494 "Conficting code log entries %s and %s" % (old_code, new_code) 495 if old_code.name == new_code.name: 496 return 497 # Code object may be shared by a few functions. Collect the full 498 # set of names. 499 old_code.AddName(new_code.name) 500 501 502class Descriptor(object): 503 """Descriptor of a structure in the binary trace log.""" 504 505 CTYPE_MAP = { 506 "u16": ctypes.c_uint16, 507 "u32": ctypes.c_uint32, 508 "u64": ctypes.c_uint64 509 } 510 511 def __init__(self, fields): 512 class TraceItem(ctypes.Structure): 513 _fields_ = Descriptor.CtypesFields(fields) 514 515 def __str__(self): 516 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 517 for field, _ in TraceItem._fields_) 518 519 self.ctype = TraceItem 520 521 def Read(self, trace, offset): 522 return self.ctype.from_buffer(trace, offset) 523 524 @staticmethod 525 def CtypesFields(fields): 526 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 527 528 529# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 530# for the gory details. 531 532 533# Reference: struct perf_file_header in kernel/tools/perf/util/header.h 534TRACE_HEADER_DESC = Descriptor([ 535 ("magic", "u64"), 536 ("size", "u64"), 537 ("attr_size", "u64"), 538 ("attrs_offset", "u64"), 539 ("attrs_size", "u64"), 540 ("data_offset", "u64"), 541 ("data_size", "u64"), 542 ("event_types_offset", "u64"), 543 ("event_types_size", "u64") 544]) 545 546 547# Reference: /usr/include/linux/perf_event.h 548PERF_EVENT_ATTR_DESC = Descriptor([ 549 ("type", "u32"), 550 ("size", "u32"), 551 ("config", "u64"), 552 ("sample_period_or_freq", "u64"), 553 ("sample_type", "u64"), 554 ("read_format", "u64"), 555 ("flags", "u64"), 556 ("wakeup_events_or_watermark", "u32"), 557 ("bp_type", "u32"), 558 ("bp_addr", "u64"), 559 ("bp_len", "u64") 560]) 561 562 563# Reference: /usr/include/linux/perf_event.h 564PERF_EVENT_HEADER_DESC = Descriptor([ 565 ("type", "u32"), 566 ("misc", "u16"), 567 ("size", "u16") 568]) 569 570 571# Reference: kernel/events/core.c 572PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 573 ("pid", "u32"), 574 ("tid", "u32"), 575 ("addr", "u64"), 576 ("len", "u64"), 577 ("pgoff", "u64") 578]) 579 580 581# perf_event_attr.sample_type bits control the set of 582# perf_sample_event fields. 583PERF_SAMPLE_IP = 1 << 0 584PERF_SAMPLE_TID = 1 << 1 585PERF_SAMPLE_TIME = 1 << 2 586PERF_SAMPLE_ADDR = 1 << 3 587PERF_SAMPLE_READ = 1 << 4 588PERF_SAMPLE_CALLCHAIN = 1 << 5 589PERF_SAMPLE_ID = 1 << 6 590PERF_SAMPLE_CPU = 1 << 7 591PERF_SAMPLE_PERIOD = 1 << 8 592PERF_SAMPLE_STREAM_ID = 1 << 9 593PERF_SAMPLE_RAW = 1 << 10 594 595 596# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. 597PERF_SAMPLE_EVENT_BODY_FIELDS = [ 598 ("ip", "u64", PERF_SAMPLE_IP), 599 ("pid", "u32", PERF_SAMPLE_TID), 600 ("tid", "u32", PERF_SAMPLE_TID), 601 ("time", "u64", PERF_SAMPLE_TIME), 602 ("addr", "u64", PERF_SAMPLE_ADDR), 603 ("id", "u64", PERF_SAMPLE_ID), 604 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 605 ("cpu", "u32", PERF_SAMPLE_CPU), 606 ("res", "u32", PERF_SAMPLE_CPU), 607 ("period", "u64", PERF_SAMPLE_PERIOD), 608 # Don't want to handle read format that comes after the period and 609 # before the callchain and has variable size. 610 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 611 # Raw data follows the callchain and is ignored. 612] 613 614 615PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 616 617 618PERF_RECORD_MMAP = 1 619PERF_RECORD_SAMPLE = 9 620 621 622class TraceReader(object): 623 """Perf (linux-2.6/tools/perf) trace file reader.""" 624 625 _TRACE_HEADER_MAGIC = 4993446653023372624 626 627 def __init__(self, trace_name): 628 self.trace_file = open(trace_name, "r") 629 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 630 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 631 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 632 print >>sys.stderr, "Warning: unsupported trace header magic" 633 self.offset = self.trace_header.data_offset 634 self.limit = self.trace_header.data_offset + self.trace_header.data_size 635 assert self.limit <= self.trace.size(), \ 636 "Trace data limit exceeds trace file size" 637 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 638 assert self.trace_header.attrs_size != 0, \ 639 "No perf event attributes found in the trace" 640 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 641 self.trace_header.attrs_offset) 642 self.sample_event_body_desc = self._SampleEventBodyDesc( 643 perf_event_attr.sample_type) 644 self.callchain_supported = \ 645 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 646 if self.callchain_supported: 647 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 648 self.ip_size = ctypes.sizeof(self.ip_struct) 649 650 def ReadEventHeader(self): 651 if self.offset >= self.limit: 652 return None, 0 653 offset = self.offset 654 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 655 self.offset += header.size 656 return header, offset 657 658 def ReadMmap(self, header, offset): 659 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 660 offset + self.header_size) 661 # Read null-terminated filename. 662 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 663 offset + header.size] 664 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 665 return mmap_info 666 667 def ReadSample(self, header, offset): 668 sample = self.sample_event_body_desc.Read(self.trace, 669 offset + self.header_size) 670 if not self.callchain_supported: 671 return sample 672 sample.ips = [] 673 offset += self.header_size + ctypes.sizeof(sample) 674 for _ in xrange(sample.nr): 675 sample.ips.append( 676 self.ip_struct.from_buffer(self.trace, offset).value) 677 offset += self.ip_size 678 return sample 679 680 def Dispose(self): 681 self.trace.close() 682 self.trace_file.close() 683 684 def _SampleEventBodyDesc(self, sample_type): 685 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 686 "Can't hande read format in samples" 687 fields = [(field, format) 688 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 689 if (bit & sample_type) != 0] 690 return Descriptor(fields) 691 692 693OBJDUMP_SECTION_HEADER_RE = re.compile( 694 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 695OBJDUMP_SYMBOL_LINE_RE = re.compile( 696 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 697OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 698 r"^DYNAMIC SYMBOL TABLE") 699OBJDUMP_SKIP_RE = re.compile( 700 r"^.*ld\.so\.cache$") 701KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 702PERF_KERNEL_ALLSYMS_RE = re.compile( 703 r".*kallsyms.*") 704KERNEL_ALLSYMS_LINE_RE = re.compile( 705 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 706 707 708class LibraryRepo(object): 709 def __init__(self): 710 self.infos = [] 711 self.names = set() 712 self.ticks = {} 713 714 def Load(self, mmap_info, code_map, options): 715 # Skip kernel mmaps when requested using the fact that their tid 716 # is 0. 717 if mmap_info.tid == 0 and not options.kernel: 718 return True 719 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 720 return True 721 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 722 return self._LoadKernelSymbols(code_map) 723 self.infos.append(mmap_info) 724 mmap_info.ticks = 0 725 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 726 if not os.path.exists(mmap_info.filename): 727 return True 728 # Request section headers (-h), symbols (-t), and dynamic symbols 729 # (-T) from objdump. 730 # Unfortunately, section headers span two lines, so we have to 731 # keep the just seen section name (from the first line in each 732 # section header) in the after_section variable. 733 if mmap_info.filename.endswith(".ko"): 734 dynamic_symbols = "" 735 else: 736 dynamic_symbols = "-T" 737 process = subprocess.Popen( 738 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), 739 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 740 pipe = process.stdout 741 after_section = None 742 code_sections = set() 743 reloc_sections = set() 744 dynamic = False 745 try: 746 for line in pipe: 747 if after_section: 748 if line.find("CODE") != -1: 749 code_sections.add(after_section) 750 if line.find("RELOC") != -1: 751 reloc_sections.add(after_section) 752 after_section = None 753 continue 754 755 match = OBJDUMP_SECTION_HEADER_RE.match(line) 756 if match: 757 after_section = match.group(1) 758 continue 759 760 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 761 dynamic = True 762 continue 763 764 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 765 if match: 766 start_address = int(match.group(1), 16) 767 origin_offset = start_address 768 flags = match.group(2) 769 section = match.group(3) 770 if section in code_sections: 771 if dynamic or section in reloc_sections: 772 start_address += mmap_info.addr 773 size = int(match.group(4), 16) 774 name = match.group(5) 775 origin = mmap_info.filename 776 code_map.Add(Code(name, start_address, start_address + size, 777 origin, origin_offset)) 778 finally: 779 pipe.close() 780 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 781 782 def Tick(self, pc): 783 for i, mmap_info in enumerate(self.infos): 784 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 785 mmap_info.ticks += 1 786 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 787 return True 788 return False 789 790 def _UniqueMmapName(self, mmap_info): 791 name = mmap_info.filename 792 index = 1 793 while name in self.names: 794 name = "%s-%d" % (mmap_info.filename, index) 795 index += 1 796 self.names.add(name) 797 return name 798 799 def _LoadKernelSymbols(self, code_map): 800 if not os.path.exists(KERNEL_ALLSYMS_FILE): 801 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 802 return False 803 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 804 code = None 805 for line in kallsyms: 806 match = KERNEL_ALLSYMS_LINE_RE.match(line) 807 if match: 808 start_address = int(match.group(1), 16) 809 end_address = start_address 810 name = match.group(2) 811 if code: 812 code.end_address = start_address 813 code_map.Add(code, 16) 814 code = Code(name, start_address, end_address, "kernel", 0) 815 return True 816 817 818def PrintReport(code_map, library_repo, arch, ticks, options): 819 print "Ticks per symbol:" 820 used_code = [code for code in code_map.UsedCode()] 821 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 822 for i, code in enumerate(used_code): 823 code_ticks = code.self_ticks 824 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 825 code.FullName(), code.origin) 826 if options.disasm_all or i < options.disasm_top: 827 code.PrintAnnotated(arch, options) 828 print 829 print "Ticks per library:" 830 mmap_infos = [m for m in library_repo.infos if m.ticks > 0] 831 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 832 for mmap_info in mmap_infos: 833 mmap_ticks = mmap_info.ticks 834 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 835 mmap_info.unique_name) 836 837 838def PrintDot(code_map, options): 839 print "digraph G {" 840 for code in code_map.UsedCode(): 841 if code.self_ticks < 10: 842 continue 843 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 844 if code.callee_ticks: 845 for callee, ticks in code.callee_ticks.iteritems(): 846 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 847 print "}" 848 849 850if __name__ == "__main__": 851 parser = optparse.OptionParser(USAGE) 852 parser.add_option("--snapshot-log", 853 default="obj/release/snapshot.log", 854 help="V8 snapshot log file name [default: %default]") 855 parser.add_option("--log", 856 default="v8.log", 857 help="V8 log file name [default: %default]") 858 parser.add_option("--snapshot", 859 default=False, 860 action="store_true", 861 help="process V8 snapshot log [default: %default]") 862 parser.add_option("--trace", 863 default="perf.data", 864 help="perf trace file name [default: %default]") 865 parser.add_option("--kernel", 866 default=False, 867 action="store_true", 868 help="process kernel entries [default: %default]") 869 parser.add_option("--disasm-top", 870 default=0, 871 type="int", 872 help=("number of top symbols to disassemble and annotate " 873 "[default: %default]")) 874 parser.add_option("--disasm-all", 875 default=False, 876 action="store_true", 877 help=("disassemble and annotate all used symbols " 878 "[default: %default]")) 879 parser.add_option("--dot", 880 default=False, 881 action="store_true", 882 help="produce dot output (WIP) [default: %default]") 883 parser.add_option("--quiet", "-q", 884 default=False, 885 action="store_true", 886 help="no auxiliary messages [default: %default]") 887 parser.add_option("--gc-fake-mmap", 888 default="/tmp/__v8_gc__", 889 help="gc fake mmap file [default: %default]") 890 parser.add_option("--objdump", 891 default="/usr/bin/objdump", 892 help="objdump tool to use [default: %default]") 893 parser.add_option("--host-root", 894 default="", 895 help="Path to the host root [default: %default]") 896 options, args = parser.parse_args() 897 898 if not options.quiet: 899 if options.snapshot: 900 print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log, 901 options.log, 902 options.log) 903 else: 904 print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log) 905 print "Perf trace file: %s" % options.trace 906 907 V8_GC_FAKE_MMAP = options.gc_fake_mmap 908 HOST_ROOT = options.host_root 909 if os.path.exists(options.objdump): 910 disasm.OBJDUMP_BIN = options.objdump 911 OBJDUMP_BIN = options.objdump 912 else: 913 print "Cannot find %s, falling back to default objdump" % options.objdump 914 915 # Stats. 916 events = 0 917 ticks = 0 918 missed_ticks = 0 919 really_missed_ticks = 0 920 optimized_ticks = 0 921 generated_ticks = 0 922 v8_internal_ticks = 0 923 mmap_time = 0 924 sample_time = 0 925 926 # Process the snapshot log to fill the snapshot name map. 927 snapshot_name_map = {} 928 if options.snapshot: 929 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log) 930 snapshot_name_map = snapshot_log_reader.ReadNameMap() 931 932 # Initialize the log reader. 933 code_map = CodeMap() 934 log_reader = LogReader(log_name=options.log + ".ll", 935 code_map=code_map, 936 snapshot_pos_to_name=snapshot_name_map) 937 if not options.quiet: 938 print "Generated code architecture: %s" % log_reader.arch 939 print 940 sys.stdout.flush() 941 942 # Process the code and trace logs. 943 library_repo = LibraryRepo() 944 log_reader.ReadUpToGC() 945 trace_reader = TraceReader(options.trace) 946 while True: 947 header, offset = trace_reader.ReadEventHeader() 948 if not header: 949 break 950 events += 1 951 if header.type == PERF_RECORD_MMAP: 952 start = time.time() 953 mmap_info = trace_reader.ReadMmap(header, offset) 954 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 955 log_reader.ReadUpToGC() 956 else: 957 library_repo.Load(mmap_info, code_map, options) 958 mmap_time += time.time() - start 959 elif header.type == PERF_RECORD_SAMPLE: 960 ticks += 1 961 start = time.time() 962 sample = trace_reader.ReadSample(header, offset) 963 code = code_map.Find(sample.ip) 964 if code: 965 code.Tick(sample.ip) 966 if code.codetype == Code.OPTIMIZED: 967 optimized_ticks += 1 968 elif code.codetype == Code.FULL_CODEGEN: 969 generated_ticks += 1 970 elif code.codetype == Code.V8INTERNAL: 971 v8_internal_ticks += 1 972 else: 973 missed_ticks += 1 974 if not library_repo.Tick(sample.ip) and not code: 975 really_missed_ticks += 1 976 if trace_reader.callchain_supported: 977 for ip in sample.ips: 978 caller_code = code_map.Find(ip) 979 if caller_code: 980 if code: 981 caller_code.CalleeTick(code) 982 code = caller_code 983 sample_time += time.time() - start 984 985 if options.dot: 986 PrintDot(code_map, options) 987 else: 988 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 989 990 if not options.quiet: 991 def PrintTicks(number, total, description): 992 print("%10d %5.1f%% ticks in %s" % 993 (number, 100.0*number/total, description)) 994 print 995 print "Stats:" 996 print "%10d total trace events" % events 997 print "%10d total ticks" % ticks 998 print "%10d ticks not in symbols" % missed_ticks 999 unaccounted = "unaccounted ticks" 1000 if really_missed_ticks > 0: 1001 unaccounted += " (probably in the kernel, try --kernel)" 1002 PrintTicks(really_missed_ticks, ticks, unaccounted) 1003 PrintTicks(optimized_ticks, ticks, "ticks in optimized code") 1004 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") 1005 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") 1006 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 1007 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 1008 print "%9.2fs library processing time" % mmap_time 1009 print "%9.2fs tick processing time" % sample_time 1010 1011 log_reader.Dispose() 1012 trace_reader.Dispose() 1013