1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import bisect
31import collections
32import ctypes
33import disasm
34import mmap
35import optparse
36import os
37import re
38import subprocess
39import sys
40import time
41
42
43USAGE="""usage: %prog [OPTION]...
44
45Analyses V8 and perf logs to produce profiles.
46
47Perf logs can be collected using a command like:
48  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49  # -R: collect all data
50  # -e cycles: use cpu-cycles event (run "perf list" for details)
51  # -c 10000: write a sample after each 10000 events
52  # -f: force output file overwrite
53  # -i: limit profiling to our process and the kernel
54  # --ll-prof shell flag enables the right V8 logs
55This will produce a binary trace file (perf.data) that %prog can analyse.
56
57IMPORTANT:
58  The kernel has an internal maximum for events per second, it is 100K by
59  default. That's not enough for "-c 10000". Set it to some higher value:
60  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61  You can also make the warning about kernel address maps go away:
62  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64We have a convenience script that handles all of the above for you:
65  $ tools/run-llprof.sh ./d8 bench.js
66
67Examples:
68  # Print flat profile with annotated disassembly for the 10 top
69  # symbols. Use default log names.
70  $ %prog --disasm-top=10
71
72  # Print flat profile with annotated disassembly for all used symbols.
73  # Use default log names and include kernel symbols into analysis.
74  $ %prog --disasm-all --kernel
75
76  # Print flat profile. Use custom log names.
77  $ %prog --log=foo.log --trace=foo.data
78"""
79
80
81JS_ORIGIN = "js"
82
83
84class Code(object):
85  """Code object."""
86
87  _id = 0
88  UNKNOWN = 0
89  V8INTERNAL = 1
90  FULL_CODEGEN = 2
91  OPTIMIZED = 3
92
93  def __init__(self, name, start_address, end_address, origin, origin_offset):
94    self.id = Code._id
95    Code._id += 1
96    self.name = name
97    self.other_names = None
98    self.start_address = start_address
99    self.end_address = end_address
100    self.origin = origin
101    self.origin_offset = origin_offset
102    self.self_ticks = 0
103    self.self_ticks_map = None
104    self.callee_ticks = None
105    if name.startswith("LazyCompile:*"):
106      self.codetype = Code.OPTIMIZED
107    elif name.startswith("LazyCompile:"):
108      self.codetype = Code.FULL_CODEGEN
109    elif name.startswith("v8::internal::"):
110      self.codetype = Code.V8INTERNAL
111    else:
112      self.codetype = Code.UNKNOWN
113
114  def AddName(self, name):
115    assert self.name != name
116    if self.other_names is None:
117      self.other_names = [name]
118      return
119    if not name in self.other_names:
120      self.other_names.append(name)
121
122  def FullName(self):
123    if self.other_names is None:
124      return self.name
125    self.other_names.sort()
126    return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
127
128  def IsUsed(self):
129    return self.self_ticks > 0 or self.callee_ticks is not None
130
131  def Tick(self, pc):
132    self.self_ticks += 1
133    if self.self_ticks_map is None:
134      self.self_ticks_map = collections.defaultdict(lambda: 0)
135    offset = pc - self.start_address
136    self.self_ticks_map[offset] += 1
137
138  def CalleeTick(self, callee):
139    if self.callee_ticks is None:
140      self.callee_ticks = collections.defaultdict(lambda: 0)
141    self.callee_ticks[callee] += 1
142
143  def PrintAnnotated(self, arch, options):
144    if self.self_ticks_map is None:
145      ticks_map = []
146    else:
147      ticks_map = self.self_ticks_map.items()
148    # Convert the ticks map to offsets and counts arrays so that later
149    # we can do binary search in the offsets array.
150    ticks_map.sort(key=lambda t: t[0])
151    ticks_offsets = [t[0] for t in ticks_map]
152    ticks_counts = [t[1] for t in ticks_map]
153    # Get a list of disassembled lines and their addresses.
154    lines = self._GetDisasmLines(arch, options)
155    if len(lines) == 0:
156      return
157    # Print annotated lines.
158    address = lines[0][0]
159    total_count = 0
160    for i in xrange(len(lines)):
161      start_offset = lines[i][0] - address
162      if i == len(lines) - 1:
163        end_offset = self.end_address - self.start_address
164      else:
165        end_offset = lines[i + 1][0] - address
166      # Ticks (reported pc values) are not always precise, i.e. not
167      # necessarily point at instruction starts. So we have to search
168      # for ticks that touch the current instruction line.
169      j = bisect.bisect_left(ticks_offsets, end_offset)
170      count = 0
171      for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172        if offset < start_offset:
173          break
174        count += cnt
175      total_count += count
176      percent = 100.0 * count / self.self_ticks
177      offset = lines[i][0]
178      if percent >= 0.01:
179        # 5 spaces for tick count
180        # 1 space following
181        # 1 for '|'
182        # 1 space following
183        # 6 for the percentage number, incl. the '.'
184        # 1 for the '%' sign
185        # => 15
186        print "%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1])
187      else:
188        print "%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1])
189    print
190    assert total_count == self.self_ticks, \
191        "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
192
193  def __str__(self):
194    return "%s [0x%x, 0x%x) size: %d origin: %s" % (
195      self.name,
196      self.start_address,
197      self.end_address,
198      self.end_address - self.start_address,
199      self.origin)
200
201  def _GetDisasmLines(self, arch, options):
202    if self.origin == JS_ORIGIN:
203      inplace = False
204      filename = options.log + ".ll"
205    else:
206      inplace = True
207      filename = self.origin
208    return disasm.GetDisasmLines(filename,
209                                 self.origin_offset,
210                                 self.end_address - self.start_address,
211                                 arch,
212                                 inplace)
213
214
215class CodePage(object):
216  """Group of adjacent code objects."""
217
218  SHIFT = 20  # 1M pages
219  SIZE = (1 << SHIFT)
220  MASK = ~(SIZE - 1)
221
222  @staticmethod
223  def PageAddress(address):
224    return address & CodePage.MASK
225
226  @staticmethod
227  def PageId(address):
228    return address >> CodePage.SHIFT
229
230  @staticmethod
231  def PageAddressFromId(id):
232    return id << CodePage.SHIFT
233
234  def __init__(self, address):
235    self.address = address
236    self.code_objects = []
237
238  def Add(self, code):
239    self.code_objects.append(code)
240
241  def Remove(self, code):
242    self.code_objects.remove(code)
243
244  def Find(self, pc):
245    code_objects = self.code_objects
246    for i, code in enumerate(code_objects):
247      if code.start_address <= pc < code.end_address:
248        code_objects[0], code_objects[i] = code, code_objects[0]
249        return code
250    return None
251
252  def __iter__(self):
253    return self.code_objects.__iter__()
254
255
256class CodeMap(object):
257  """Code object map."""
258
259  def __init__(self):
260    self.pages = {}
261    self.min_address = 1 << 64
262    self.max_address = -1
263
264  def Add(self, code, max_pages=-1):
265    page_id = CodePage.PageId(code.start_address)
266    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
267    pages = 0
268    while page_id < limit_id:
269      if max_pages >= 0 and pages > max_pages:
270        print >>sys.stderr, \
271            "Warning: page limit (%d) reached for %s [%s]" % (
272            max_pages, code.name, code.origin)
273        break
274      if page_id in self.pages:
275        page = self.pages[page_id]
276      else:
277        page = CodePage(CodePage.PageAddressFromId(page_id))
278        self.pages[page_id] = page
279      page.Add(code)
280      page_id += 1
281      pages += 1
282    self.min_address = min(self.min_address, code.start_address)
283    self.max_address = max(self.max_address, code.end_address)
284
285  def Remove(self, code):
286    page_id = CodePage.PageId(code.start_address)
287    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
288    removed = False
289    while page_id < limit_id:
290      if page_id not in self.pages:
291        page_id += 1
292        continue
293      page = self.pages[page_id]
294      page.Remove(code)
295      removed = True
296      page_id += 1
297    return removed
298
299  def AllCode(self):
300    for page in self.pages.itervalues():
301      for code in page:
302        if CodePage.PageAddress(code.start_address) == page.address:
303          yield code
304
305  def UsedCode(self):
306    for code in self.AllCode():
307      if code.IsUsed():
308        yield code
309
310  def Print(self):
311    for code in self.AllCode():
312      print code
313
314  def Find(self, pc):
315    if pc < self.min_address or pc >= self.max_address:
316      return None
317    page_id = CodePage.PageId(pc)
318    if page_id not in self.pages:
319      return None
320    return self.pages[page_id].Find(pc)
321
322
323class CodeInfo(object):
324  """Generic info about generated code objects."""
325
326  def __init__(self, arch, header_size):
327    self.arch = arch
328    self.header_size = header_size
329
330
331class LogReader(object):
332  """V8 low-level (binary) log reader."""
333
334  _ARCH_TO_POINTER_TYPE_MAP = {
335    "ia32": ctypes.c_uint32,
336    "arm": ctypes.c_uint32,
337    "mips": ctypes.c_uint32,
338    "x64": ctypes.c_uint64,
339    "arm64": ctypes.c_uint64
340  }
341
342  _CODE_CREATE_TAG = "C"
343  _CODE_MOVE_TAG = "M"
344  _CODE_MOVING_GC_TAG = "G"
345
346  def __init__(self, log_name, code_map):
347    self.log_file = open(log_name, "r")
348    self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
349    self.log_pos = 0
350    self.code_map = code_map
351
352    self.arch = self.log[:self.log.find("\0")]
353    self.log_pos += len(self.arch) + 1
354    assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
355        "Unsupported architecture %s" % self.arch
356    pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
357
358    self.code_create_struct = LogReader._DefineStruct([
359        ("name_size", ctypes.c_int32),
360        ("code_address", pointer_type),
361        ("code_size", ctypes.c_int32)])
362
363    self.code_move_struct = LogReader._DefineStruct([
364        ("from_address", pointer_type),
365        ("to_address", pointer_type)])
366
367    self.code_delete_struct = LogReader._DefineStruct([
368        ("address", pointer_type)])
369
370  def ReadUpToGC(self):
371    while self.log_pos < self.log.size():
372      tag = self.log[self.log_pos]
373      self.log_pos += 1
374
375      if tag == LogReader._CODE_MOVING_GC_TAG:
376        return
377
378      if tag == LogReader._CODE_CREATE_TAG:
379        event = self.code_create_struct.from_buffer(self.log, self.log_pos)
380        self.log_pos += ctypes.sizeof(event)
381        start_address = event.code_address
382        end_address = start_address + event.code_size
383        name = self.log[self.log_pos:self.log_pos + event.name_size]
384        origin = JS_ORIGIN
385        self.log_pos += event.name_size
386        origin_offset = self.log_pos
387        self.log_pos += event.code_size
388        code = Code(name, start_address, end_address, origin, origin_offset)
389        conficting_code = self.code_map.Find(start_address)
390        if conficting_code:
391          if not (conficting_code.start_address == code.start_address and
392            conficting_code.end_address == code.end_address):
393            self.code_map.Remove(conficting_code)
394          else:
395            LogReader._HandleCodeConflict(conficting_code, code)
396            # TODO(vitalyr): this warning is too noisy because of our
397            # attempts to reconstruct code log from the snapshot.
398            # print >>sys.stderr, \
399            #     "Warning: Skipping duplicate code log entry %s" % code
400            continue
401        self.code_map.Add(code)
402        continue
403
404      if tag == LogReader._CODE_MOVE_TAG:
405        event = self.code_move_struct.from_buffer(self.log, self.log_pos)
406        self.log_pos += ctypes.sizeof(event)
407        old_start_address = event.from_address
408        new_start_address = event.to_address
409        if old_start_address == new_start_address:
410          # Skip useless code move entries.
411          continue
412        code = self.code_map.Find(old_start_address)
413        if not code:
414          print >>sys.stderr, "Warning: Not found %x" % old_start_address
415          continue
416        assert code.start_address == old_start_address, \
417            "Inexact move address %x for %s" % (old_start_address, code)
418        self.code_map.Remove(code)
419        size = code.end_address - code.start_address
420        code.start_address = new_start_address
421        code.end_address = new_start_address + size
422        self.code_map.Add(code)
423        continue
424
425      assert False, "Unknown tag %s" % tag
426
427  def Dispose(self):
428    self.log.close()
429    self.log_file.close()
430
431  @staticmethod
432  def _DefineStruct(fields):
433    class Struct(ctypes.Structure):
434      _fields_ = fields
435    return Struct
436
437  @staticmethod
438  def _HandleCodeConflict(old_code, new_code):
439    assert (old_code.start_address == new_code.start_address and
440            old_code.end_address == new_code.end_address), \
441        "Conficting code log entries %s and %s" % (old_code, new_code)
442    if old_code.name == new_code.name:
443      return
444    # Code object may be shared by a few functions. Collect the full
445    # set of names.
446    old_code.AddName(new_code.name)
447
448
449class Descriptor(object):
450  """Descriptor of a structure in the binary trace log."""
451
452  CTYPE_MAP = {
453    "u16": ctypes.c_uint16,
454    "u32": ctypes.c_uint32,
455    "u64": ctypes.c_uint64
456  }
457
458  def __init__(self, fields):
459    class TraceItem(ctypes.Structure):
460      _fields_ = Descriptor.CtypesFields(fields)
461
462      def __str__(self):
463        return ", ".join("%s: %s" % (field, self.__getattribute__(field))
464                         for field, _ in TraceItem._fields_)
465
466    self.ctype = TraceItem
467
468  def Read(self, trace, offset):
469    return self.ctype.from_buffer(trace, offset)
470
471  @staticmethod
472  def CtypesFields(fields):
473    return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
474
475
476# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
477# for the gory details.
478
479
480# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
481TRACE_HEADER_DESC = Descriptor([
482  ("magic", "u64"),
483  ("size", "u64"),
484  ("attr_size", "u64"),
485  ("attrs_offset", "u64"),
486  ("attrs_size", "u64"),
487  ("data_offset", "u64"),
488  ("data_size", "u64"),
489  ("event_types_offset", "u64"),
490  ("event_types_size", "u64")
491])
492
493
494# Reference: /usr/include/linux/perf_event.h
495PERF_EVENT_ATTR_DESC = Descriptor([
496  ("type", "u32"),
497  ("size", "u32"),
498  ("config", "u64"),
499  ("sample_period_or_freq", "u64"),
500  ("sample_type", "u64"),
501  ("read_format", "u64"),
502  ("flags", "u64"),
503  ("wakeup_events_or_watermark", "u32"),
504  ("bp_type", "u32"),
505  ("bp_addr", "u64"),
506  ("bp_len", "u64")
507])
508
509
510# Reference: /usr/include/linux/perf_event.h
511PERF_EVENT_HEADER_DESC = Descriptor([
512  ("type", "u32"),
513  ("misc", "u16"),
514  ("size", "u16")
515])
516
517
518# Reference: kernel/tools/perf/util/event.h
519PERF_MMAP_EVENT_BODY_DESC = Descriptor([
520  ("pid", "u32"),
521  ("tid", "u32"),
522  ("addr", "u64"),
523  ("len", "u64"),
524  ("pgoff", "u64")
525])
526
527# Reference: kernel/tools/perf/util/event.h
528PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
529  ("pid", "u32"),
530  ("tid", "u32"),
531  ("addr", "u64"),
532  ("len", "u64"),
533  ("pgoff", "u64"),
534  ("maj", "u32"),
535  ("min", "u32"),
536  ("ino", "u64"),
537  ("ino_generation", "u64"),
538  ("prot", "u32"),
539  ("flags","u32")
540])
541
542# perf_event_attr.sample_type bits control the set of
543# perf_sample_event fields.
544PERF_SAMPLE_IP = 1 << 0
545PERF_SAMPLE_TID = 1 << 1
546PERF_SAMPLE_TIME = 1 << 2
547PERF_SAMPLE_ADDR = 1 << 3
548PERF_SAMPLE_READ = 1 << 4
549PERF_SAMPLE_CALLCHAIN = 1 << 5
550PERF_SAMPLE_ID = 1 << 6
551PERF_SAMPLE_CPU = 1 << 7
552PERF_SAMPLE_PERIOD = 1 << 8
553PERF_SAMPLE_STREAM_ID = 1 << 9
554PERF_SAMPLE_RAW = 1 << 10
555
556
557# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
558PERF_SAMPLE_EVENT_BODY_FIELDS = [
559  ("ip", "u64", PERF_SAMPLE_IP),
560  ("pid", "u32", PERF_SAMPLE_TID),
561  ("tid", "u32", PERF_SAMPLE_TID),
562  ("time", "u64", PERF_SAMPLE_TIME),
563  ("addr", "u64", PERF_SAMPLE_ADDR),
564  ("id", "u64", PERF_SAMPLE_ID),
565  ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
566  ("cpu", "u32", PERF_SAMPLE_CPU),
567  ("res", "u32", PERF_SAMPLE_CPU),
568  ("period", "u64", PERF_SAMPLE_PERIOD),
569  # Don't want to handle read format that comes after the period and
570  # before the callchain and has variable size.
571  ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
572  # Raw data follows the callchain and is ignored.
573]
574
575
576PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
577
578
579PERF_RECORD_MMAP = 1
580PERF_RECORD_MMAP2 = 10
581PERF_RECORD_SAMPLE = 9
582
583
584class TraceReader(object):
585  """Perf (linux-2.6/tools/perf) trace file reader."""
586
587  _TRACE_HEADER_MAGIC = 4993446653023372624
588
589  def __init__(self, trace_name):
590    self.trace_file = open(trace_name, "r")
591    self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
592    self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
593    if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
594      print >>sys.stderr, "Warning: unsupported trace header magic"
595    self.offset = self.trace_header.data_offset
596    self.limit = self.trace_header.data_offset + self.trace_header.data_size
597    assert self.limit <= self.trace.size(), \
598        "Trace data limit exceeds trace file size"
599    self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
600    assert self.trace_header.attrs_size != 0, \
601        "No perf event attributes found in the trace"
602    perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
603                                                self.trace_header.attrs_offset)
604    self.sample_event_body_desc = self._SampleEventBodyDesc(
605        perf_event_attr.sample_type)
606    self.callchain_supported = \
607        (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
608    if self.callchain_supported:
609      self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
610      self.ip_size = ctypes.sizeof(self.ip_struct)
611
612  def ReadEventHeader(self):
613    if self.offset >= self.limit:
614      return None, 0
615    offset = self.offset
616    header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
617    self.offset += header.size
618    return header, offset
619
620  def ReadMmap(self, header, offset):
621    mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
622                                               offset + self.header_size)
623    # Read null-terminated filename.
624    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
625                          offset + header.size]
626    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
627    return mmap_info
628
629  def ReadMmap2(self, header, offset):
630    mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
631                                                offset + self.header_size)
632    # Read null-terminated filename.
633    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
634                          offset + header.size]
635    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
636    return mmap_info
637
638  def ReadSample(self, header, offset):
639    sample = self.sample_event_body_desc.Read(self.trace,
640                                              offset + self.header_size)
641    if not self.callchain_supported:
642      return sample
643    sample.ips = []
644    offset += self.header_size + ctypes.sizeof(sample)
645    for _ in xrange(sample.nr):
646      sample.ips.append(
647        self.ip_struct.from_buffer(self.trace, offset).value)
648      offset += self.ip_size
649    return sample
650
651  def Dispose(self):
652    self.trace.close()
653    self.trace_file.close()
654
655  def _SampleEventBodyDesc(self, sample_type):
656    assert (sample_type & PERF_SAMPLE_READ) == 0, \
657           "Can't hande read format in samples"
658    fields = [(field, format)
659              for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
660              if (bit & sample_type) != 0]
661    return Descriptor(fields)
662
663
664OBJDUMP_SECTION_HEADER_RE = re.compile(
665  r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
666OBJDUMP_SYMBOL_LINE_RE = re.compile(
667  r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
668OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
669  r"^DYNAMIC SYMBOL TABLE")
670OBJDUMP_SKIP_RE = re.compile(
671  r"^.*ld\.so\.cache$")
672KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
673PERF_KERNEL_ALLSYMS_RE = re.compile(
674  r".*kallsyms.*")
675KERNEL_ALLSYMS_LINE_RE = re.compile(
676  r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
677
678
679class LibraryRepo(object):
680  def __init__(self):
681    self.infos = []
682    self.names = set()
683    self.ticks = {}
684
685
686  def HasDynamicSymbols(self, filename):
687    if filename.endswith(".ko"): return False
688    process = subprocess.Popen(
689      "%s -h %s" % (OBJDUMP_BIN, filename),
690      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
691    pipe = process.stdout
692    try:
693      for line in pipe:
694        match = OBJDUMP_SECTION_HEADER_RE.match(line)
695        if match and match.group(1) == 'dynsym': return True
696    finally:
697      pipe.close()
698    assert process.wait() == 0, "Failed to objdump -h %s" % filename
699    return False
700
701
702  def Load(self, mmap_info, code_map, options):
703    # Skip kernel mmaps when requested using the fact that their tid
704    # is 0.
705    if mmap_info.tid == 0 and not options.kernel:
706      return True
707    if OBJDUMP_SKIP_RE.match(mmap_info.filename):
708      return True
709    if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
710      return self._LoadKernelSymbols(code_map)
711    self.infos.append(mmap_info)
712    mmap_info.ticks = 0
713    mmap_info.unique_name = self._UniqueMmapName(mmap_info)
714    if not os.path.exists(mmap_info.filename):
715      return True
716    # Request section headers (-h), symbols (-t), and dynamic symbols
717    # (-T) from objdump.
718    # Unfortunately, section headers span two lines, so we have to
719    # keep the just seen section name (from the first line in each
720    # section header) in the after_section variable.
721    if self.HasDynamicSymbols(mmap_info.filename):
722      dynamic_symbols = "-T"
723    else:
724      dynamic_symbols = ""
725    process = subprocess.Popen(
726      "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
727      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
728    pipe = process.stdout
729    after_section = None
730    code_sections = set()
731    reloc_sections = set()
732    dynamic = False
733    try:
734      for line in pipe:
735        if after_section:
736          if line.find("CODE") != -1:
737            code_sections.add(after_section)
738          if line.find("RELOC") != -1:
739            reloc_sections.add(after_section)
740          after_section = None
741          continue
742
743        match = OBJDUMP_SECTION_HEADER_RE.match(line)
744        if match:
745          after_section = match.group(1)
746          continue
747
748        if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
749          dynamic = True
750          continue
751
752        match = OBJDUMP_SYMBOL_LINE_RE.match(line)
753        if match:
754          start_address = int(match.group(1), 16)
755          origin_offset = start_address
756          flags = match.group(2)
757          section = match.group(3)
758          if section in code_sections:
759            if dynamic or section in reloc_sections:
760              start_address += mmap_info.addr
761            size = int(match.group(4), 16)
762            name = match.group(5)
763            origin = mmap_info.filename
764            code_map.Add(Code(name, start_address, start_address + size,
765                              origin, origin_offset))
766    finally:
767      pipe.close()
768    assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
769
770  def Tick(self, pc):
771    for i, mmap_info in enumerate(self.infos):
772      if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
773        mmap_info.ticks += 1
774        self.infos[0], self.infos[i] = mmap_info, self.infos[0]
775        return True
776    return False
777
778  def _UniqueMmapName(self, mmap_info):
779    name = mmap_info.filename
780    index = 1
781    while name in self.names:
782      name = "%s-%d" % (mmap_info.filename, index)
783      index += 1
784    self.names.add(name)
785    return name
786
787  def _LoadKernelSymbols(self, code_map):
788    if not os.path.exists(KERNEL_ALLSYMS_FILE):
789      print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
790      return False
791    kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
792    code = None
793    for line in kallsyms:
794      match = KERNEL_ALLSYMS_LINE_RE.match(line)
795      if match:
796        start_address = int(match.group(1), 16)
797        end_address = start_address
798        name = match.group(2)
799        if code:
800          code.end_address = start_address
801          code_map.Add(code, 16)
802        code = Code(name, start_address, end_address, "kernel", 0)
803    return True
804
805
806def PrintReport(code_map, library_repo, arch, ticks, options):
807  print "Ticks per symbol:"
808  used_code = [code for code in code_map.UsedCode()]
809  used_code.sort(key=lambda x: x.self_ticks, reverse=True)
810  for i, code in enumerate(used_code):
811    code_ticks = code.self_ticks
812    print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
813                                    code.FullName(), code.origin)
814    if options.disasm_all or i < options.disasm_top:
815      code.PrintAnnotated(arch, options)
816  print
817  print "Ticks per library:"
818  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
819  mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
820  for mmap_info in mmap_infos:
821    mmap_ticks = mmap_info.ticks
822    print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
823                               mmap_info.unique_name)
824
825
826def PrintDot(code_map, options):
827  print "digraph G {"
828  for code in code_map.UsedCode():
829    if code.self_ticks < 10:
830      continue
831    print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
832    if code.callee_ticks:
833      for callee, ticks in code.callee_ticks.iteritems():
834        print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
835  print "}"
836
837
838if __name__ == "__main__":
839  parser = optparse.OptionParser(USAGE)
840  parser.add_option("--log",
841                    default="v8.log",
842                    help="V8 log file name [default: %default]")
843  parser.add_option("--trace",
844                    default="perf.data",
845                    help="perf trace file name [default: %default]")
846  parser.add_option("--kernel",
847                    default=False,
848                    action="store_true",
849                    help="process kernel entries [default: %default]")
850  parser.add_option("--disasm-top",
851                    default=0,
852                    type="int",
853                    help=("number of top symbols to disassemble and annotate "
854                          "[default: %default]"))
855  parser.add_option("--disasm-all",
856                    default=False,
857                    action="store_true",
858                    help=("disassemble and annotate all used symbols "
859                          "[default: %default]"))
860  parser.add_option("--dot",
861                    default=False,
862                    action="store_true",
863                    help="produce dot output (WIP) [default: %default]")
864  parser.add_option("--quiet", "-q",
865                    default=False,
866                    action="store_true",
867                    help="no auxiliary messages [default: %default]")
868  parser.add_option("--gc-fake-mmap",
869                    default="/tmp/__v8_gc__",
870                    help="gc fake mmap file [default: %default]")
871  parser.add_option("--objdump",
872                    default="/usr/bin/objdump",
873                    help="objdump tool to use [default: %default]")
874  parser.add_option("--host-root",
875                    default="",
876                    help="Path to the host root [default: %default]")
877  options, args = parser.parse_args()
878
879  if not options.quiet:
880    print "V8 log: %s, %s.ll" % (options.log, options.log)
881    print "Perf trace file: %s" % options.trace
882
883  V8_GC_FAKE_MMAP = options.gc_fake_mmap
884  HOST_ROOT = options.host_root
885  if os.path.exists(options.objdump):
886    disasm.OBJDUMP_BIN = options.objdump
887    OBJDUMP_BIN = options.objdump
888  else:
889    print "Cannot find %s, falling back to default objdump" % options.objdump
890
891  # Stats.
892  events = 0
893  ticks = 0
894  missed_ticks = 0
895  really_missed_ticks = 0
896  optimized_ticks = 0
897  generated_ticks = 0
898  v8_internal_ticks = 0
899  mmap_time = 0
900  sample_time = 0
901
902  # Initialize the log reader.
903  code_map = CodeMap()
904  log_reader = LogReader(log_name=options.log + ".ll",
905                         code_map=code_map)
906  if not options.quiet:
907    print "Generated code architecture: %s" % log_reader.arch
908    print
909    sys.stdout.flush()
910
911  # Process the code and trace logs.
912  library_repo = LibraryRepo()
913  log_reader.ReadUpToGC()
914  trace_reader = TraceReader(options.trace)
915  while True:
916    header, offset = trace_reader.ReadEventHeader()
917    if not header:
918      break
919    events += 1
920    if header.type == PERF_RECORD_MMAP:
921      start = time.time()
922      mmap_info = trace_reader.ReadMmap(header, offset)
923      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
924        log_reader.ReadUpToGC()
925      else:
926        library_repo.Load(mmap_info, code_map, options)
927      mmap_time += time.time() - start
928    elif header.type == PERF_RECORD_MMAP2:
929      start = time.time()
930      mmap_info = trace_reader.ReadMmap2(header, offset)
931      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
932        log_reader.ReadUpToGC()
933      else:
934        library_repo.Load(mmap_info, code_map, options)
935      mmap_time += time.time() - start
936    elif header.type == PERF_RECORD_SAMPLE:
937      ticks += 1
938      start = time.time()
939      sample = trace_reader.ReadSample(header, offset)
940      code = code_map.Find(sample.ip)
941      if code:
942        code.Tick(sample.ip)
943        if code.codetype == Code.OPTIMIZED:
944          optimized_ticks += 1
945        elif code.codetype == Code.FULL_CODEGEN:
946          generated_ticks += 1
947        elif code.codetype == Code.V8INTERNAL:
948          v8_internal_ticks += 1
949      else:
950        missed_ticks += 1
951      if not library_repo.Tick(sample.ip) and not code:
952        really_missed_ticks += 1
953      if trace_reader.callchain_supported:
954        for ip in sample.ips:
955          caller_code = code_map.Find(ip)
956          if caller_code:
957            if code:
958              caller_code.CalleeTick(code)
959            code = caller_code
960      sample_time += time.time() - start
961
962  if options.dot:
963    PrintDot(code_map, options)
964  else:
965    PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
966
967    if not options.quiet:
968      def PrintTicks(number, total, description):
969        print("%10d %5.1f%% ticks in %s" %
970              (number, 100.0*number/total, description))
971      print
972      print "Stats:"
973      print "%10d total trace events" % events
974      print "%10d total ticks" % ticks
975      print "%10d ticks not in symbols" % missed_ticks
976      unaccounted = "unaccounted ticks"
977      if really_missed_ticks > 0:
978        unaccounted += " (probably in the kernel, try --kernel)"
979      PrintTicks(really_missed_ticks, ticks, unaccounted)
980      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
981      PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
982      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
983      print "%10d total symbols" % len([c for c in code_map.AllCode()])
984      print "%10d used symbols" % len([c for c in code_map.UsedCode()])
985      print "%9.2fs library processing time" % mmap_time
986      print "%9.2fs tick processing time" % sample_time
987
988  log_reader.Dispose()
989  trace_reader.Dispose()
990