1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Generates a human-interpretable view of a native heap dump from 'am dumpheap -n'."""
18
19import logging
20import os
21import os.path
22import re
23import subprocess
24import sys
25import zipfile
26
27class Args:
28  _usage = """
29Usage:
301. Collect a native heap dump from the device. For example:
31   $ adb shell stop
32   $ adb shell setprop libc.debug.malloc.program app_process
33   $ adb shell setprop libc.debug.malloc.options backtrace=64
34   $ adb shell start
35    (launch and use app)
36   $ adb shell am dumpheap -n <pid> /data/local/tmp/native_heap.txt
37   $ adb pull /data/local/tmp/native_heap.txt
38
392. Run the viewer:
40   $ python native_heapdump_viewer.py [options] native_heap.txt
41      [--verbose]: verbose output
42      [--html]: interactive html output
43      [--reverse]: reverse the backtraces (start the tree from the leaves)
44      [--symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the .so files with symbols.
45                 Defaults to $ANDROID_PRODUCT_OUT/symbols
46      [--app-symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the app APK and so files.
47                 Defaults to the current directory.
48   This outputs a file with lines of the form:
49
50      5831776  29.09% 100.00%    10532     71b07bc0b0 /system/lib64/libandroid_runtime.so Typeface_createFromArray frameworks/base/core/jni/android/graphics/Typeface.cpp:68
51
52   5831776 is the total number of bytes allocated at this stack frame, which
53   is 29.09% of the total number of bytes allocated and 100.00% of the parent
54   frame's bytes allocated. 10532 is the total number of allocations at this
55   stack frame. 71b07bc0b0 is the address of the stack frame.
56"""
57
58  def __init__(self):
59    self.verbose = False
60    self.html_output = False
61    self.reverse_frames = False
62    product_out = os.getenv("ANDROID_PRODUCT_OUT")
63    if product_out:
64      self.symboldir = product_out + "/symbols"
65    else:
66      self.symboldir = "./symbols"
67    self.app_symboldir = ""
68
69    i = 1
70    extra_args = []
71    while i < len(sys.argv):
72      if sys.argv[i] == "--symbols":
73        i += 1
74        self.symboldir = sys.argv[i] + "/"
75      elif sys.argv[i] == "--app-symbols":
76        i += 1
77        self.app_symboldir = sys.argv[i] + "/"
78      elif sys.argv[i] == "--verbose":
79        self.verbose = True
80      elif sys.argv[i] == "--html":
81        self.html_output = True
82      elif sys.argv[i] == "--reverse":
83        self.reverse_frames = True
84      elif sys.argv[i][0] == '-':
85        print("Invalid option %s" % (sys.argv[i]))
86      else:
87        extra_args.append(sys.argv[i])
88      i += 1
89
90    if len(extra_args) != 1:
91      print(self._usage)
92      sys.exit(1)
93
94    self.native_heap = extra_args[0]
95
96class Backtrace:
97  def __init__(self, is_zygote, size, num_allocs, frames):
98    self.is_zygote = is_zygote
99    self.size = size
100    self.num_allocs = num_allocs
101    self.frames = frames
102
103class Mapping:
104  def __init__(self, start, end, offset, name):
105    self.start = start
106    self.end = end
107    self.offset = offset
108    self.name = name
109
110class FrameDescription:
111  def __init__(self, function, location, library):
112    self.function = function
113    self.location = location
114    self.library = library
115
116def GetVersion(native_heap):
117  """Get the version of the native heap dump."""
118
119  re_line = re.compile("Android\s+Native\s+Heap\s+Dump\s+(?P<version>v\d+\.\d+)\s*$")
120  matched = 0
121  with open(native_heap, "r") as f:
122    for line in f:
123      m = re_line.match(line)
124      if m:
125        return m.group('version')
126  return None
127
128def GetNumFieldValidByParsingLines(native_heap):
129  """Determine if the num field is valid by parsing the backtrace lines.
130
131  Malloc debug for N incorrectly set the num field to the number of
132  backtraces instead of the number of allocations with the same size and
133  backtrace. Read the file and if at least three lines all have the field
134  set to the number of backtraces values, then consider this generated by
135  the buggy malloc debug and indicate the num field is not valid.
136
137  Returns:
138    True if the num field is valid.
139    False if the num field is not valid and should be ignored.
140  """
141
142  re_backtrace = re.compile("Backtrace\s+size:\s+(?P<backtrace_size>\d+)")
143
144  re_line = re.compile("z\s+(?P<zygote>\d+)\s+sz\s+(?P<size>\d+)\s+num\s+(?P<num_allocations>\d+)")
145  matched = 0
146  backtrace_size = 0
147  with open(native_heap, "r") as f:
148    for line in f:
149      if backtrace_size == 0:
150        m = re_backtrace.match(line)
151        if m:
152          backtrace_size = int(m.group('backtrace_size'))
153      parts = line.split()
154      if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz":
155        m = re_line.match(line)
156        if m:
157          num_allocations = int(m.group('num_allocations'))
158          if num_allocations == backtrace_size:
159            # At least three lines must match this pattern before
160            # considering this the old buggy version of malloc debug.
161            matched += 1
162            if matched == 3:
163              return False
164          else:
165            return True
166  return matched == 0
167
168def GetNumFieldValid(native_heap):
169  version = GetVersion(native_heap)
170  if not version or version == "v1.0":
171    # Version v1.0 was produced by a buggy version of malloc debug where the
172    # num field was set incorrectly.
173    # Unfortunately, Android P produced a v1.0 version that does set the
174    # num field. Do one more check to see if this is the broken version.
175    return GetNumFieldValidByParsingLines(native_heap)
176  else:
177    return True
178
179def GetMappingFromOffset(mapping, app_symboldir):
180  """
181  If the input mapping is a zip file, translate the contained uncompressed files and add mapping
182  entries.
183
184  This is done to handle symbols for the uncompressed .so files inside APKs. With the replaced
185  mappings, the script looks up the .so files as separate files.
186  """
187  basename = os.path.basename(mapping.name)
188  zip_name = app_symboldir + basename
189  if os.path.isfile(zip_name):
190    opened_zip = zipfile.ZipFile(zip_name)
191    if opened_zip:
192      # For all files in the zip, add mappings for the internal files.
193      for file_info in opened_zip.infolist():
194        # Only add stored files since it doesn't make sense to have PC into compressed ones.
195        if file_info.compress_type == zipfile.ZIP_STORED:
196          zip_header_entry_size = 30
197          data_offset = (file_info.header_offset
198              + zip_header_entry_size
199              + len(file_info.filename)
200              + len(file_info.extra)
201              + len(file_info.comment))
202          end_offset = data_offset + file_info.file_size
203          if mapping.offset >= data_offset and mapping.offset < end_offset:
204            # Round up the data_offset to the nearest page since the .so must be aligned.
205            so_file_alignment = 4096
206            data_offset += so_file_alignment - 1;
207            data_offset -= data_offset % so_file_alignment;
208            mapping.name = file_info.filename
209            mapping.offset -= data_offset
210            break
211  return mapping
212
213def ParseNativeHeap(native_heap, reverse_frames, num_field_valid, app_symboldir):
214  """Parse the native heap into backtraces, maps.
215
216  Returns two lists, the first is a list of all of the backtraces, the
217  second is the sorted list of maps.
218  """
219
220  backtraces = []
221  mappings = []
222
223  re_map = re.compile("(?P<start>[0-9a-f]+)-(?P<end>[0-9a-f]+) .... (?P<offset>[0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ +(?P<name>.*)")
224
225  with open(native_heap, "r") as f:
226    for line in f:
227      # Format of line:
228      #   z 0  sz       50  num    1  bt 000000000000a100 000000000000b200
229      parts = line.split()
230      if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz":
231        is_zygote = parts[1] != "1"
232        size = int(parts[3])
233        if num_field_valid:
234          num_allocs = int(parts[5])
235        else:
236          num_allocs = 1
237        frames = list(map(lambda x: int(x, 16), parts[7:]))
238        if reverse_frames:
239          frames = list(reversed(frames))
240        backtraces.append(Backtrace(is_zygote, size, num_allocs, frames))
241      else:
242        # Parse map line:
243        #   720de01000-720ded7000 r-xp 00000000 fd:00 495  /system/lib64/libc.so
244        m = re_map.match(line)
245        if m:
246          # Offset of mapping start
247          start = int(m.group('start'), 16)
248          # Offset of mapping end
249          end = int(m.group('end'), 16)
250          # Offset within file that is mapped
251          offset = int(m.group('offset'), 16)
252          name = m.group('name')
253          mappings.append(GetMappingFromOffset(Mapping(start, end, offset, name), app_symboldir))
254  return backtraces, mappings
255
256def FindMapping(mappings, addr):
257  """Find the mapping given addr.
258
259  Returns the mapping that contains addr.
260  Returns None if there is no such mapping.
261  """
262
263  min = 0
264  max = len(mappings) - 1
265  while True:
266    if max < min:
267      return None
268    mid = (min + max) // 2
269    if mappings[mid].end <= addr:
270      min = mid + 1
271    elif mappings[mid].start > addr:
272      max = mid - 1
273    else:
274      return mappings[mid]
275
276
277def ResolveAddrs(html_output, symboldir, app_symboldir, backtraces, mappings):
278  """Resolve address libraries and offsets.
279
280  addr_offsets maps addr to .so file offset
281  addrs_by_lib maps library to list of addrs from that library
282  Resolved addrs maps addr to FrameDescription
283
284  Returns the resolved_addrs hash.
285  """
286
287  addr_offsets = {}
288  addrs_by_lib = {}
289  resolved_addrs = {}
290  empty_frame_description = FrameDescription("???", "???", "???")
291  for backtrace in backtraces:
292    for addr in backtrace.frames:
293      if addr in addr_offsets:
294        continue
295      mapping = FindMapping(mappings, addr)
296      if mapping:
297        addr_offsets[addr] = addr - mapping.start + mapping.offset
298        if not (mapping.name in addrs_by_lib):
299          addrs_by_lib[mapping.name] = []
300        addrs_by_lib[mapping.name].append(addr)
301      else:
302        resolved_addrs[addr] = empty_frame_description
303
304  # Resolve functions and line numbers.
305  if html_output == False:
306    print("Resolving symbols using directory %s..." % symboldir)
307
308  for lib in addrs_by_lib:
309    sofile = app_symboldir + lib
310    if not os.path.isfile(sofile):
311      sofile = symboldir + lib
312    if os.path.isfile(sofile):
313      file_offset = 0
314      result = subprocess.check_output(["objdump", "-w", "-j", ".text", "-h", sofile])
315      for line in result.split("\n"):
316        splitted = line.split()
317        if len(splitted) > 5 and splitted[1] == ".text":
318          file_offset = int(splitted[5], 16)
319          break
320
321      input_addrs = ""
322      for addr in addrs_by_lib[lib]:
323        input_addrs += "%s\n" % hex(addr_offsets[addr] - file_offset)
324
325      p = subprocess.Popen(["addr2line", "-C", "-j", ".text", "-e", sofile, "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
326      result = p.communicate(input_addrs)[0]
327      addr2line_rc = p.returncode
328      if addr2line_rc and (addr2line_rc < 0):
329        logging.warn("addr2line on " + sofile + " terminated by signal " + str(-1 * addr2line_rc))
330      splitted = result.split("\n")
331      for x in range(0, len(addrs_by_lib[lib])):
332        try:
333          function = splitted[2*x];
334          location = splitted[2*x+1];
335          resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription(function, location, lib)
336        except Exception:
337          logging.warn("exception while resolving symbols", exc_info=True)
338          resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription("---", "---", lib)
339    else:
340      if html_output == False:
341        print("%s not found for symbol resolution" % lib)
342
343      fd = FrameDescription("???", "???", lib)
344      for addr in addrs_by_lib[lib]:
345        resolved_addrs[addr] = fd
346
347  return resolved_addrs
348
349def Addr2Line(resolved_addrs, addr):
350  if addr == "ZYGOTE" or addr == "APP":
351    return FrameDescription("", "", "")
352
353  return resolved_addrs[int(addr, 16)]
354
355class AddrInfo:
356  def __init__(self, addr):
357    self.addr = addr
358    self.size = 0
359    self.number = 0
360    self.num_allocs = 0
361    self.children = {}
362
363  def addStack(self, size, num_allocs, stack):
364    self.size += size * num_allocs
365    self.number += num_allocs
366    if len(stack) > 0:
367      child = stack[0]
368      if not (child.addr in self.children):
369        self.children[child.addr] = child
370      self.children[child.addr].addStack(size, num_allocs, stack[1:])
371
372def Display(resolved_addrs, indent, total, parent_total, node):
373  fd = Addr2Line(resolved_addrs, node.addr)
374  total_percent = 0
375  if total != 0:
376    total_percent = 100 * node.size / float(total)
377  parent_percent = 0
378  if parent_total != 0:
379    parent_percent = 100 * node.size / float(parent_total)
380  print("%9d %6.2f%% %6.2f%% %8d %s%s %s %s %s" % (node.size, total_percent, parent_percent, node.number, indent, node.addr, fd.library, fd.function, fd.location))
381  children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
382  for child in children:
383    Display(resolved_addrs, indent + "  ", total, node.size, child)
384
385def DisplayHtml(verbose, resolved_addrs, total, node, extra, label_count):
386  fd = Addr2Line(resolved_addrs, node.addr)
387  if verbose:
388    lib = fd.library
389  else:
390    lib = os.path.basename(fd.library)
391  total_percent = 0
392  if total != 0:
393    total_percent = 100 * node.size / float(total)
394  label = "%d %6.2f%% %6d %s%s %s %s" % (node.size, total_percent, node.number, extra, lib, fd.function, fd.location)
395  label = label.replace("&", "&amp;")
396  label = label.replace("'", "&apos;")
397  label = label.replace('"', "&quot;")
398  label = label.replace("<", "&lt;")
399  label = label.replace(">", "&gt;")
400  children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
401  print('<li>')
402  if len(children) > 0:
403    print('<label for="' + str(label_count) + '">' + label + '</label>')
404    print('<input type="checkbox" id="' + str(label_count) + '"/>')
405    print('<ol>')
406    label_count += 1
407    for child in children:
408      label_count = DisplayHtml(verbose, resolved_addrs, total, child, "", label_count)
409    print('</ol>')
410  else:
411    print(label)
412  print('</li>')
413
414  return label_count
415
416def CreateHtml(verbose, app, zygote, resolved_addrs):
417  print("""
418<!DOCTYPE html>
419<html><head><style>
420li input {
421    display: none;
422}
423li input:checked + ol > li {
424    display: block;
425}
426li input + ol > li {
427    display: none;
428}
429li {
430    font-family: Roboto Mono,monospace;
431}
432label {
433    font-family: Roboto Mono,monospace;
434    cursor: pointer
435}
436</style></head><body>Native allocation HTML viewer<br><br>
437Click on an individual line to expand/collapse to see the details of the
438allocation data<ol>
439""")
440
441  label_count = 0
442  label_count = DisplayHtml(verbose, resolved_addrs, app.size, app, "app ", label_count)
443  if zygote.size > 0:
444    DisplayHtml(verbose, resolved_addrs, zygote.size, zygote, "zygote ", label_count)
445  print("</ol></body></html>")
446
447def main():
448  args = Args()
449
450  num_field_valid = GetNumFieldValid(args.native_heap)
451
452  backtraces, mappings = ParseNativeHeap(args.native_heap, args.reverse_frames, num_field_valid,
453      args.app_symboldir)
454  # Resolve functions and line numbers
455  resolved_addrs = ResolveAddrs(args.html_output, args.symboldir, args.app_symboldir, backtraces,
456      mappings)
457
458  app = AddrInfo("APP")
459  zygote = AddrInfo("ZYGOTE")
460
461  for backtrace in backtraces:
462    stack = []
463    for addr in backtrace.frames:
464      stack.append(AddrInfo("%x" % addr))
465    stack.reverse()
466    if backtrace.is_zygote:
467      zygote.addStack(backtrace.size, backtrace.num_allocs, stack)
468    else:
469      app.addStack(backtrace.size, backtrace.num_allocs, stack)
470
471  if args.html_output:
472    CreateHtml(args.verbose, app, zygote, resolved_addrs)
473  else:
474    print("")
475    print("%9s %6s %6s %8s    %s %s %s %s" % ("BYTES", "%TOTAL", "%PARENT", "COUNT", "ADDR", "LIBRARY", "FUNCTION", "LOCATION"))
476    Display(resolved_addrs, "", app.size, app.size + zygote.size, app)
477    print("")
478    Display(resolved_addrs, "", zygote.size, app.size + zygote.size, zygote)
479    print("")
480
481if __name__ == '__main__':
482  main()
483