1#!/usr/bin/env python3
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import atexit
23import json
24import glob
25import os
26import platform
27import re
28import shutil
29import signal
30import subprocess
31import unittest
32
33ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".")
34
35
36def FindClangDir():
37  get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py"
38  if os.path.exists(get_clang_version):
39    # We want the script to fail if get_clang_version.py exists but is unable
40    # to find the clang version.
41    version_output = subprocess.check_output(get_clang_version, text=True)
42    return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip()
43  else:
44    return None
45
46
47def FindSymbolsDir():
48  saveddir = os.getcwd()
49  os.chdir(ANDROID_BUILD_TOP)
50  stream = None
51  try:
52    cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED"
53    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout
54    return str(stream.read().strip())
55  finally:
56    if stream is not None:
57        stream.close()
58    os.chdir(saveddir)
59
60SYMBOLS_DIR = FindSymbolsDir()
61
62ARCH_IS_32BIT = None
63
64VERBOSE = False
65
66# These are private. Do not access them from other modules.
67_CACHED_TOOLCHAIN = None
68_CACHED_CXX_FILT = None
69
70# Caches for symbolized information.
71_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {}
72_SYMBOL_INFORMATION_OBJDUMP_CACHE = {}
73_SYMBOL_DEMANGLING_CACHE = {}
74
75# Caches for pipes to subprocesses.
76
77class ProcessCache:
78  _cmd2pipe = {}
79  _lru = []
80
81  # Max number of open pipes.
82  _PIPE_MAX_OPEN = 10
83
84  def GetProcess(self, cmd):
85    cmd_tuple = tuple(cmd)  # Need to use a tuple as lists can't be dict keys.
86    # Pipe already available?
87    if cmd_tuple in self._cmd2pipe:
88      pipe = self._cmd2pipe[cmd_tuple]
89      # Update LRU.
90      self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple]
91      return pipe
92
93    # Not cached, yet. Open a new one.
94
95    # Check if too many are open, close the old ones.
96    while len(self._lru) >= self._PIPE_MAX_OPEN:
97      open_cmd, open_pipe = self._lru.pop()
98      del self._cmd2pipe[open_cmd]
99      self.TerminateProcess(open_pipe)
100
101    # Create and put into cache.
102    pipe = self.SpawnProcess(cmd)
103    self._cmd2pipe[cmd_tuple] = pipe
104    self._lru = [(cmd_tuple, pipe)] + self._lru
105    return pipe
106
107  def SpawnProcess(self, cmd):
108     return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
109
110  def TerminateProcess(self, pipe):
111    pipe.stdin.close()
112    pipe.stdout.close()
113    pipe.terminate()
114    pipe.wait()
115
116  def KillAllProcesses(self):
117    for _, open_pipe in self._lru:
118      self.TerminateProcess(open_pipe)
119    _cmd2pipe = {}
120    _lru = []
121
122
123_PIPE_ADDR2LINE_CACHE = ProcessCache()
124_PIPE_CPPFILT_CACHE = ProcessCache()
125
126
127# Process cache cleanup on shutdown.
128
129def CloseAllPipes():
130  _PIPE_ADDR2LINE_CACHE.KillAllProcesses()
131  _PIPE_CPPFILT_CACHE.KillAllProcesses()
132
133
134atexit.register(CloseAllPipes)
135
136
137def PipeTermHandler(signum, frame):
138  CloseAllPipes()
139  os._exit(0)
140
141
142for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM):
143  signal.signal(sig, PipeTermHandler)
144
145
146
147
148def ToolPath(tool, toolchain=None):
149  """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """
150  if shutil.which(tool):
151    return tool
152  if not toolchain:
153    toolchain = FindToolchain()
154  return os.path.join(toolchain, tool)
155
156
157def FindToolchain():
158  """Returns the toolchain."""
159
160  global _CACHED_TOOLCHAIN
161  if _CACHED_TOOLCHAIN:
162    return _CACHED_TOOLCHAIN
163
164  llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/";
165  if not os.path.exists(llvm_binutils_dir):
166    raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir))
167
168  _CACHED_TOOLCHAIN = llvm_binutils_dir
169  print("Using toolchain from:", _CACHED_TOOLCHAIN)
170  return _CACHED_TOOLCHAIN
171
172
173def SymbolInformation(lib, addr):
174  """Look up symbol information about an address.
175
176  Args:
177    lib: library (or executable) pathname containing symbols
178    addr: string hexidecimal address
179
180  Returns:
181    A list of the form [(source_symbol, source_location,
182    object_symbol_with_offset)].
183
184    If the function has been inlined then the list may contain
185    more than one element with the symbols for the most deeply
186    nested inlined location appearing first.  The list is
187    always non-empty, even if no information is available.
188
189    Usually you want to display the source_location and
190    object_symbol_with_offset from the last element in the list.
191  """
192  info = SymbolInformationForSet(lib, set([addr]))
193  return (info and info.get(addr)) or [(None, None, None)]
194
195
196def SymbolInformationForSet(lib, unique_addrs):
197  """Look up symbol information for a set of addresses from the given library.
198
199  Args:
200    lib: library (or executable) pathname containing symbols
201    unique_addrs: set of hexidecimal addresses
202
203  Returns:
204    A dictionary of the form {addr: [(source_symbol, source_location,
205    object_symbol_with_offset)]} where each address has a list of
206    associated symbols and locations.  The list is always non-empty.
207
208    If the function has been inlined then the list may contain
209    more than one element with the symbols for the most deeply
210    nested inlined location appearing first.  The list is
211    always non-empty, even if no information is available.
212
213    Usually you want to display the source_location and
214    object_symbol_with_offset from the last element in the list.
215  """
216  if not lib:
217    return None
218
219  addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs)
220  if not addr_to_line:
221    return None
222
223  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
224  if not addr_to_objdump:
225    return None
226
227  result = {}
228  for addr in unique_addrs:
229    source_info = addr_to_line.get(addr)
230    if not source_info:
231      source_info = [(None, None)]
232    if addr in addr_to_objdump:
233      (object_symbol, object_offset) = addr_to_objdump.get(addr)
234      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
235                                                         object_offset)
236    else:
237      object_symbol_with_offset = None
238    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
239        for (source_symbol, source_location) in source_info]
240
241  return result
242
243
244def CallLlvmSymbolizerForSet(lib, unique_addrs):
245  """Look up line and symbol information for a set of addresses.
246
247  Args:
248    lib: library (or executable) pathname containing symbols
249    unique_addrs: set of string hexidecimal addresses look up.
250
251  Returns:
252    A dictionary of the form {addr: [(symbol, file:line)]} where
253    each address has a list of associated symbols and locations
254    or an empty list if no symbol information was found.
255
256    If the function has been inlined then the list may contain
257    more than one element with the symbols for the most deeply
258    nested inlined location appearing first.
259  """
260  if not lib:
261    return None
262
263  result = {}
264  addrs = sorted(unique_addrs)
265
266  if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE:
267    addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib]
268
269    # Go through and handle all known addresses.
270    for x in range(len(addrs)):
271      next_addr = addrs.pop(0)
272      if next_addr in addr_cache:
273        result[next_addr] = addr_cache[next_addr]
274      else:
275        # Re-add, needs to be symbolized.
276        addrs.append(next_addr)
277
278    if not addrs:
279      # Everything was cached, we're done.
280      return result
281  else:
282    addr_cache = {}
283    _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache
284
285  symbols = SYMBOLS_DIR + lib
286  if not os.path.exists(symbols):
287    symbols = lib
288    if not os.path.exists(symbols):
289      return None
290
291  # Make sure the symbols path is not a directory.
292  if os.path.isdir(symbols):
293    return None
294
295  cmd = [ToolPath("llvm-symbolizer"), "--functions", "--inlines",
296      "--demangle", "--obj=" + symbols, "--output-style=JSON"]
297  child = _PIPE_ADDR2LINE_CACHE.GetProcess(cmd)
298
299  for addr in addrs:
300    try:
301      child.stdin.write("0x%s\n" % addr)
302      child.stdin.flush()
303      records = []
304      json_result = json.loads(child.stdout.readline().strip())
305      for symbol in json_result["Symbol"]:
306        function_name = symbol["FunctionName"]
307        # GNU style location: file_name:line_num
308        location = ("%s:%s" % (symbol["FileName"], symbol["Line"]))
309        records.append((function_name, location))
310    except IOError as e:
311      # Remove the / in front of the library name to match other output.
312      records = [(None, lib[1:] + "  ***Error: " + str(e))]
313    result[addr] = records
314    addr_cache[addr] = records
315  return result
316
317
318def CallObjdumpForSet(lib, unique_addrs):
319  """Use objdump to find out the names of the containing functions.
320
321  Args:
322    lib: library (or executable) pathname containing symbols
323    unique_addrs: set of string hexidecimal addresses to find the functions for.
324
325  Returns:
326    A dictionary of the form {addr: (string symbol, offset)}.
327  """
328  if not lib:
329    return None
330
331  result = {}
332  addrs = sorted(unique_addrs)
333
334  addr_cache = None
335  if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE:
336    addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib]
337
338    # Go through and handle all known addresses.
339    for x in range(len(addrs)):
340      next_addr = addrs.pop(0)
341      if next_addr in addr_cache:
342        result[next_addr] = addr_cache[next_addr]
343      else:
344        # Re-add, needs to be symbolized.
345        addrs.append(next_addr)
346
347    if not addrs:
348      # Everything was cached, we're done.
349      return result
350  else:
351    addr_cache = {}
352    _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache
353
354  symbols = SYMBOLS_DIR + lib
355  if not os.path.exists(symbols):
356    symbols = lib
357    if not os.path.exists(symbols):
358      return None
359
360  start_addr_dec = str(int(addrs[0], 16))
361  stop_addr_dec = str(int(addrs[-1], 16) + 8)
362  cmd = [ToolPath("llvm-objdump"),
363         "--section=.text",
364         "--demangle",
365         "--disassemble",
366         "--start-address=" + start_addr_dec,
367         "--stop-address=" + stop_addr_dec,
368         symbols]
369
370  # Function lines look like:
371  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
372  # We pull out the address and function first. Then we check for an optional
373  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
374  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
375  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
376
377  # A disassembly line looks like:
378  #   177b2:	b510      	push	{r4, lr}
379  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
380
381  current_symbol = None    # The current function symbol in the disassembly.
382  current_symbol_addr = 0  # The address of the current function.
383  addr_index = 0  # The address that we are currently looking for.
384
385  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout
386  for line in stream:
387    # Is it a function line like:
388    #   000177b0 <android::IBinder::~IBinder()>:
389    components = func_regexp.match(line)
390    if components:
391      # This is a new function, so record the current function and its address.
392      current_symbol_addr = int(components.group(1), 16)
393      current_symbol = components.group(2)
394
395      # Does it have an optional offset like: "foo(..)+0x2c"?
396      components = offset_regexp.match(current_symbol)
397      if components:
398        current_symbol = components.group(1)
399        offset = components.group(2)
400        if offset:
401          current_symbol_addr -= int(offset, 16)
402
403    # Is it an disassembly line like:
404    #   177b2:	b510      	push	{r4, lr}
405    components = asm_regexp.match(line)
406    if components:
407      addr = components.group(1)
408      target_addr = addrs[addr_index]
409      i_addr = int(addr, 16)
410      i_target = int(target_addr, 16)
411      if i_addr == i_target:
412        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
413        addr_cache[target_addr] = result[target_addr]
414        addr_index += 1
415        if addr_index >= len(addrs):
416          break
417  stream.close()
418
419  return result
420
421
422def CallCppFilt(mangled_symbol):
423  if mangled_symbol in _SYMBOL_DEMANGLING_CACHE:
424    return _SYMBOL_DEMANGLING_CACHE[mangled_symbol]
425
426  global _CACHED_CXX_FILT
427  if not _CACHED_CXX_FILT:
428    toolchains = None
429    clang_dir = FindClangDir()
430    if clang_dir:
431      if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"):
432        toolchains = [clang_dir + "/bin/llvm-cxxfilt"]
433      else:
434        raise Exception("bin/llvm-cxxfilt missing from " + clang_dir)
435    else:
436      # When run in CI, we don't have a way to find the clang version.  But
437      # llvm-cxxfilt should be available in the following relative path.
438      toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt")
439      if toolchains and len(toolchains) != 1:
440        raise Exception("Expected one llvm-cxxfilt but found many: " + \
441                        ", ".join(toolchains))
442    if not toolchains:
443      raise Exception("Could not find llvm-cxxfilt tool")
444    _CACHED_CXX_FILT = sorted(toolchains)[-1]
445
446  cmd = [_CACHED_CXX_FILT]
447  process = _PIPE_CPPFILT_CACHE.GetProcess(cmd)
448  process.stdin.write(mangled_symbol)
449  process.stdin.write("\n")
450  process.stdin.flush()
451
452  demangled_symbol = process.stdout.readline().strip()
453
454  _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol
455
456  return demangled_symbol
457
458
459def FormatSymbolWithOffset(symbol, offset):
460  if offset == 0:
461    return symbol
462  return "%s+%d" % (symbol, offset)
463
464def FormatSymbolWithoutParameters(symbol):
465  """Remove parameters from function.
466
467  Rather than trying to parse the demangled C++ signature,
468  it just removes matching top level parenthesis.
469  """
470  if not symbol:
471    return symbol
472
473  result = symbol
474  result = result.replace(") const", ")")                  # Strip const keyword.
475  result = result.replace("operator<<", "operator\u00AB")  # Avoid unmatched '<'.
476  result = result.replace("operator>>", "operator\u00BB")  # Avoid unmatched '>'.
477  result = result.replace("operator->", "operator\u2192")  # Avoid unmatched '>'.
478
479  nested = []  # Keeps tract of current nesting level of parenthesis.
480  for i in reversed(range(len(result))):  # Iterate backward to make cutting easier.
481    c = result[i]
482    if c == ')' or c == '>':
483      if len(nested) == 0:
484        end = i + 1  # Mark the end of top-level pair.
485      nested.append(c)
486    if c == '(' or c == '<':
487      if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c:
488        return symbol  # Malformed: character does not match its pair.
489      if len(nested) == 0 and c == '(' and (end - i) > 2:
490        result = result[:i] + result[end:]  # Remove substring (i, end).
491  if len(nested) > 0:
492    return symbol  # Malformed: missing pair.
493
494  return result.strip()
495
496def SetBitness(lines):
497  global ARCH_IS_32BIT
498
499  trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
500  asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+")
501
502  ARCH_IS_32BIT = False
503  for line in lines:
504    trace_match = trace_line.search(line)
505    if trace_match:
506      # Try to guess the arch, we know the bitness.
507      if len(trace_match.group(1)) == 16:
508        ARCH_IS_32BIT = False
509      else:
510        ARCH_IS_32BIT = True
511      break
512    asan_trace_match = asan_trace_line.search(line)
513    if asan_trace_match:
514      # We might be able to guess the bitness by the length of the address.
515      if len(asan_trace_match.group(1)) > 8:
516        ARCH_IS_32BIT = False
517        # We know for a fact this is 64 bit, so we are done.
518        break
519      else:
520        # This might be 32 bit, or just a small address. Keep going in this
521        # case, but if we couldn't figure anything else out, go with 32 bit.
522        ARCH_IS_32BIT = True
523
524class FindClangDirTests(unittest.TestCase):
525  @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.')
526  def test_clang_dir_found(self):
527    self.assertIsNotNone(FindClangDir())
528
529class SetBitnessTests(unittest.TestCase):
530  def test_32bit_check(self):
531    global ARCH_IS_32BIT
532
533    SetBitness(["#00 pc 000374e0"])
534    self.assertTrue(ARCH_IS_32BIT)
535
536  def test_64bit_check(self):
537    global ARCH_IS_32BIT
538
539    SetBitness(["#00 pc 00000000000374e0"])
540    self.assertFalse(ARCH_IS_32BIT)
541
542  def test_32bit_asan_trace_line_toolchain(self):
543    global ARCH_IS_32BIT
544
545    SetBitness(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
546    self.assertTrue(ARCH_IS_32BIT)
547
548  def test_64bit_asan_trace_line_toolchain(self):
549    global ARCH_IS_32BIT
550
551    SetBitness(["#12 0x5d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)",
552                "#12 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
553    self.assertFalse(ARCH_IS_32BIT)
554
555class FormatSymbolWithoutParametersTests(unittest.TestCase):
556  def test_c(self):
557    self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo")
558    self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42")
559
560  def test_simple(self):
561    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo")
562    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42")
563    self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42")
564    self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()")
565
566  def test_templates(self):
567    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>")
568    self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo")
569    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>")
570    self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"),
571                                                   "bar::foo<(EnumType)0>")
572
573  def test_nested(self):
574    self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar")
575
576  def test_unbalanced(self):
577    self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)")
578    self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)")
579    self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)")
580    self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)")
581
582if __name__ == '__main__':
583    unittest.main(verbosity=2)
584