1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import glob
23import os
24import platform
25import re
26import subprocess
27import unittest
28
29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
30if not ANDROID_BUILD_TOP:
31  ANDROID_BUILD_TOP = "."
32
33def FindSymbolsDir():
34  saveddir = os.getcwd()
35  os.chdir(ANDROID_BUILD_TOP)
36  try:
37    cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
38           "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
39           "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
40    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
41    return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
42  finally:
43    os.chdir(saveddir)
44
45SYMBOLS_DIR = FindSymbolsDir()
46
47ARCH = None
48
49
50# These are private. Do not access them from other modules.
51_CACHED_TOOLCHAIN = None
52_CACHED_TOOLCHAIN_ARCH = None
53
54
55def ToolPath(tool, toolchain=None):
56  """Return a fully-qualified path to the specified tool"""
57  if not toolchain:
58    toolchain = FindToolchain()
59  return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
60
61
62def FindToolchain():
63  """Returns the toolchain matching ARCH."""
64  global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
65  if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
66    return _CACHED_TOOLCHAIN
67
68  # We use slightly different names from GCC, and there's only one toolchain
69  # for x86/x86_64. Note that these are the names of the top-level directory
70  # rather than the _different_ names used lower down the directory hierarchy!
71  gcc_dir = ARCH
72  if gcc_dir == "arm64":
73    gcc_dir = "aarch64"
74  elif gcc_dir == "mips64":
75    gcc_dir = "mips"
76  elif gcc_dir == "x86_64":
77    gcc_dir = "x86"
78
79  os_name = platform.system().lower();
80
81  available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
82  if len(available_toolchains) == 0:
83    raise Exception("Could not find tool chain for %s" % (ARCH))
84
85  toolchain = sorted(available_toolchains)[-1]
86
87  if not os.path.exists(ToolPath("addr2line", toolchain)):
88    raise Exception("No addr2line for %s" % (toolchain))
89
90  _CACHED_TOOLCHAIN = toolchain
91  _CACHED_TOOLCHAIN_ARCH = ARCH
92  print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
93  return _CACHED_TOOLCHAIN
94
95
96def SymbolInformation(lib, addr):
97  """Look up symbol information about an address.
98
99  Args:
100    lib: library (or executable) pathname containing symbols
101    addr: string hexidecimal address
102
103  Returns:
104    A list of the form [(source_symbol, source_location,
105    object_symbol_with_offset)].
106
107    If the function has been inlined then the list may contain
108    more than one element with the symbols for the most deeply
109    nested inlined location appearing first.  The list is
110    always non-empty, even if no information is available.
111
112    Usually you want to display the source_location and
113    object_symbol_with_offset from the last element in the list.
114  """
115  info = SymbolInformationForSet(lib, set([addr]))
116  return (info and info.get(addr)) or [(None, None, None)]
117
118
119def SymbolInformationForSet(lib, unique_addrs):
120  """Look up symbol information for a set of addresses from the given library.
121
122  Args:
123    lib: library (or executable) pathname containing symbols
124    unique_addrs: set of hexidecimal addresses
125
126  Returns:
127    A dictionary of the form {addr: [(source_symbol, source_location,
128    object_symbol_with_offset)]} where each address has a list of
129    associated symbols and locations.  The list is always non-empty.
130
131    If the function has been inlined then the list may contain
132    more than one element with the symbols for the most deeply
133    nested inlined location appearing first.  The list is
134    always non-empty, even if no information is available.
135
136    Usually you want to display the source_location and
137    object_symbol_with_offset from the last element in the list.
138  """
139  if not lib:
140    return None
141
142  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
143  if not addr_to_line:
144    return None
145
146  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
147  if not addr_to_objdump:
148    return None
149
150  result = {}
151  for addr in unique_addrs:
152    source_info = addr_to_line.get(addr)
153    if not source_info:
154      source_info = [(None, None)]
155    if addr in addr_to_objdump:
156      (object_symbol, object_offset) = addr_to_objdump.get(addr)
157      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
158                                                         object_offset)
159    else:
160      object_symbol_with_offset = None
161    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
162        for (source_symbol, source_location) in source_info]
163
164  return result
165
166
167def CallAddr2LineForSet(lib, unique_addrs):
168  """Look up line and symbol information for a set of addresses.
169
170  Args:
171    lib: library (or executable) pathname containing symbols
172    unique_addrs: set of string hexidecimal addresses look up.
173
174  Returns:
175    A dictionary of the form {addr: [(symbol, file:line)]} where
176    each address has a list of associated symbols and locations
177    or an empty list if no symbol information was found.
178
179    If the function has been inlined then the list may contain
180    more than one element with the symbols for the most deeply
181    nested inlined location appearing first.
182  """
183  if not lib:
184    return None
185
186  symbols = SYMBOLS_DIR + lib
187  if not os.path.exists(symbols):
188    symbols = lib
189    if not os.path.exists(symbols):
190      return None
191
192  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
193      "--demangle", "--exe=" + symbols]
194  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
195
196  result = {}
197  addrs = sorted(unique_addrs)
198  for addr in addrs:
199    child.stdin.write("0x%s\n" % addr)
200    child.stdin.flush()
201    records = []
202    first = True
203    while True:
204      symbol = child.stdout.readline().strip()
205      if symbol == "??":
206        symbol = None
207      location = child.stdout.readline().strip()
208      if location == "??:0" or location == "??:?":
209        location = None
210      if symbol is None and location is None:
211        break
212      records.append((symbol, location))
213      if first:
214        # Write a blank line as a sentinel so we know when to stop
215        # reading inlines from the output.
216        # The blank line will cause addr2line to emit "??\n??:0\n".
217        child.stdin.write("\n")
218        first = False
219    result[addr] = records
220  child.stdin.close()
221  child.stdout.close()
222  return result
223
224
225def StripPC(addr):
226  """Strips the Thumb bit a program counter address when appropriate.
227
228  Args:
229    addr: the program counter address
230
231  Returns:
232    The stripped program counter address.
233  """
234  global ARCH
235  if ARCH == "arm":
236    return addr & ~1
237  return addr
238
239
240def CallObjdumpForSet(lib, unique_addrs):
241  """Use objdump to find out the names of the containing functions.
242
243  Args:
244    lib: library (or executable) pathname containing symbols
245    unique_addrs: set of string hexidecimal addresses to find the functions for.
246
247  Returns:
248    A dictionary of the form {addr: (string symbol, offset)}.
249  """
250  if not lib:
251    return None
252
253  symbols = SYMBOLS_DIR + lib
254  if not os.path.exists(symbols):
255    symbols = lib
256    if not os.path.exists(symbols):
257      return None
258
259  addrs = sorted(unique_addrs)
260  start_addr_dec = str(StripPC(int(addrs[0], 16)))
261  stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
262  cmd = [ToolPath("objdump"),
263         "--section=.text",
264         "--demangle",
265         "--disassemble",
266         "--start-address=" + start_addr_dec,
267         "--stop-address=" + stop_addr_dec,
268         symbols]
269
270  # Function lines look like:
271  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
272  # We pull out the address and function first. Then we check for an optional
273  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
274  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
275  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
276
277  # A disassembly line looks like:
278  #   177b2:	b510      	push	{r4, lr}
279  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
280
281  current_symbol = None    # The current function symbol in the disassembly.
282  current_symbol_addr = 0  # The address of the current function.
283  addr_index = 0  # The address that we are currently looking for.
284
285  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
286  result = {}
287  for line in stream:
288    # Is it a function line like:
289    #   000177b0 <android::IBinder::~IBinder()>:
290    components = func_regexp.match(line)
291    if components:
292      # This is a new function, so record the current function and its address.
293      current_symbol_addr = int(components.group(1), 16)
294      current_symbol = components.group(2)
295
296      # Does it have an optional offset like: "foo(..)+0x2c"?
297      components = offset_regexp.match(current_symbol)
298      if components:
299        current_symbol = components.group(1)
300        offset = components.group(2)
301        if offset:
302          current_symbol_addr -= int(offset, 16)
303
304    # Is it an disassembly line like:
305    #   177b2:	b510      	push	{r4, lr}
306    components = asm_regexp.match(line)
307    if components:
308      addr = components.group(1)
309      target_addr = addrs[addr_index]
310      i_addr = int(addr, 16)
311      i_target = StripPC(int(target_addr, 16))
312      if i_addr == i_target:
313        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
314        addr_index += 1
315        if addr_index >= len(addrs):
316          break
317  stream.close()
318
319  return result
320
321
322def CallCppFilt(mangled_symbol):
323  cmd = [ToolPath("c++filt")]
324  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
325  process.stdin.write(mangled_symbol)
326  process.stdin.write("\n")
327  process.stdin.close()
328  demangled_symbol = process.stdout.readline().strip()
329  process.stdout.close()
330  return demangled_symbol
331
332
333def FormatSymbolWithOffset(symbol, offset):
334  if offset == 0:
335    return symbol
336  return "%s+%d" % (symbol, offset)
337
338
339def GetAbiFromToolchain(toolchain_var, bits):
340  toolchain = os.environ.get(toolchain_var)
341  if not toolchain:
342    return None
343
344  toolchain_match = re.search("\/(aarch64|arm|mips|x86)\/", toolchain)
345  if toolchain_match:
346    abi = toolchain_match.group(1)
347    if abi == "aarch64":
348      return "arm64"
349    elif bits == 64:
350      if abi == "x86":
351        return "x86_64"
352      elif abi == "mips":
353        return "mips64"
354    return abi
355  return None
356
357
358def SetAbi(lines):
359  global ARCH
360
361  abi_line = re.compile("ABI: \'(.*)\'")
362  trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
363
364  ARCH = None
365  for line in lines:
366    abi_match = abi_line.search(line)
367    if abi_match:
368      ARCH = abi_match.group(1)
369      break
370    trace_match = trace_line.search(line)
371    if trace_match:
372      # Try to guess the arch, we know the bitness.
373      if len(trace_match.group(1)) == 16:
374        # 64 bit
375        # Check for ANDROID_TOOLCHAIN, if it is set, we can figure out the
376        # arch this way. If this is not set, then default to arm64.
377        ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 64)
378        if not ARCH:
379          ARCH = "arm64"
380      else:
381        # 32 bit
382        # Check for ANDROID_TOOLCHAIN_2ND_ARCH first, if set, use that.
383        # If not try ANDROID_TOOLCHAIN to find the arch.
384        # If this is not set, then default to arm.
385        ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN_2ND_ARCH", 32)
386        if not ARCH:
387          ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 32)
388          if not ARCH:
389            ARCH = "arm"
390      break
391  if not ARCH:
392    raise Exception("Could not determine arch from input")
393
394
395class FindToolchainTests(unittest.TestCase):
396  def assert_toolchain_found(self, abi):
397    global ARCH
398    ARCH = abi
399    FindToolchain() # Will throw on failure.
400
401  def test_toolchains_found(self):
402    self.assert_toolchain_found("arm")
403    self.assert_toolchain_found("arm64")
404    self.assert_toolchain_found("mips")
405    self.assert_toolchain_found("x86")
406    self.assert_toolchain_found("x86_64")
407
408class SetArchTests(unittest.TestCase):
409  def test_abi_check(self):
410    global ARCH
411
412    SetAbi(["ABI: 'arm'"])
413    self.assertEqual(ARCH, "arm")
414    SetAbi(["ABI: 'arm64'"])
415    self.assertEqual(ARCH, "arm64")
416
417    SetAbi(["ABI: 'mips'"])
418    self.assertEqual(ARCH, "mips")
419    SetAbi(["ABI: 'mips64'"])
420    self.assertEqual(ARCH, "mips64")
421
422    SetAbi(["ABI: 'x86'"])
423    self.assertEqual(ARCH, "x86")
424    SetAbi(["ABI: 'x86_64'"])
425    self.assertEqual(ARCH, "x86_64")
426
427  def test_32bit_trace_line_toolchain(self):
428    global ARCH
429
430    os.environ.clear()
431    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
432    SetAbi(["#00 pc 000374e0"])
433    self.assertEqual(ARCH, "arm")
434
435    os.environ.clear()
436    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
437    SetAbi(["#00 pc 000374e0"])
438    self.assertEqual(ARCH, "mips")
439
440    os.environ.clear()
441    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
442    SetAbi(["#00 pc 000374e0"])
443    self.assertEqual(ARCH, "x86")
444
445  def test_32bit_trace_line_toolchain_2nd(self):
446    global ARCH
447
448    os.environ.clear()
449    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
450    os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin"
451    SetAbi(["#00 pc 000374e0"])
452    self.assertEqual(ARCH, "arm")
453
454    os.environ.clear()
455    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/mips/mips-linux-androideabi-4.9/bin"
456    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
457    SetAbi(["#00 pc 000374e0"])
458    self.assertEqual(ARCH, "mips")
459
460    os.environ.clear()
461    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin"
462    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
463    SetAbi(["#00 pc 000374e0"])
464    self.assertEqual(ARCH, "x86")
465
466  def test_64bit_trace_line_toolchain(self):
467    global ARCH
468
469    os.environ.clear()
470    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin"
471    SetAbi(["#00 pc 00000000000374e0"])
472    self.assertEqual(ARCH, "arm64")
473
474    os.environ.clear()
475    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
476    SetAbi(["#00 pc 00000000000374e0"])
477    self.assertEqual(ARCH, "mips64")
478
479    os.environ.clear()
480    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
481    SetAbi(["#00 pc 00000000000374e0"])
482    self.assertEqual(ARCH, "x86_64")
483
484  def test_default_abis(self):
485    global ARCH
486
487    os.environ.clear()
488    SetAbi(["#00 pc 000374e0"])
489    self.assertEqual(ARCH, "arm")
490    SetAbi(["#00 pc 00000000000374e0"])
491    self.assertEqual(ARCH, "arm64")
492
493  def test_no_abi(self):
494    global ARCH
495
496    self.assertRaisesRegexp(Exception, "Could not determine arch from input", SetAbi, [])
497
498if __name__ == '__main__':
499    unittest.main()
500