1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import glob
23import os
24import platform
25import re
26import subprocess
27import unittest
28
29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
30if not ANDROID_BUILD_TOP:
31  ANDROID_BUILD_TOP = "."
32
33def FindSymbolsDir():
34  saveddir = os.getcwd()
35  os.chdir(ANDROID_BUILD_TOP)
36  try:
37    cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
38           "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
39           "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
40    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
41    return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
42  finally:
43    os.chdir(saveddir)
44
45SYMBOLS_DIR = FindSymbolsDir()
46
47ARCH = None
48
49
50# These are private. Do not access them from other modules.
51_CACHED_TOOLCHAIN = None
52_CACHED_TOOLCHAIN_ARCH = None
53
54
55def ToolPath(tool, toolchain=None):
56  """Return a fully-qualified path to the specified tool"""
57  if not toolchain:
58    toolchain = FindToolchain()
59  return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
60
61
62def FindToolchain():
63  """Returns the toolchain matching ARCH."""
64  global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
65  if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
66    return _CACHED_TOOLCHAIN
67
68  # We use slightly different names from GCC, and there's only one toolchain
69  # for x86/x86_64. Note that these are the names of the top-level directory
70  # rather than the _different_ names used lower down the directory hierarchy!
71  gcc_dir = ARCH
72  if gcc_dir == "arm64":
73    gcc_dir = "aarch64"
74  elif gcc_dir == "mips64":
75    gcc_dir = "mips"
76  elif gcc_dir == "x86_64":
77    gcc_dir = "x86"
78
79  os_name = platform.system().lower();
80
81  available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
82  if len(available_toolchains) == 0:
83    raise Exception("Could not find tool chain for %s" % (ARCH))
84
85  toolchain = sorted(available_toolchains)[-1]
86
87  if not os.path.exists(ToolPath("addr2line", toolchain)):
88    raise Exception("No addr2line for %s" % (toolchain))
89
90  _CACHED_TOOLCHAIN = toolchain
91  _CACHED_TOOLCHAIN_ARCH = ARCH
92  print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
93  return _CACHED_TOOLCHAIN
94
95
96def SymbolInformation(lib, addr):
97  """Look up symbol information about an address.
98
99  Args:
100    lib: library (or executable) pathname containing symbols
101    addr: string hexidecimal address
102
103  Returns:
104    A list of the form [(source_symbol, source_location,
105    object_symbol_with_offset)].
106
107    If the function has been inlined then the list may contain
108    more than one element with the symbols for the most deeply
109    nested inlined location appearing first.  The list is
110    always non-empty, even if no information is available.
111
112    Usually you want to display the source_location and
113    object_symbol_with_offset from the last element in the list.
114  """
115  info = SymbolInformationForSet(lib, set([addr]))
116  return (info and info.get(addr)) or [(None, None, None)]
117
118
119def SymbolInformationForSet(lib, unique_addrs):
120  """Look up symbol information for a set of addresses from the given library.
121
122  Args:
123    lib: library (or executable) pathname containing symbols
124    unique_addrs: set of hexidecimal addresses
125
126  Returns:
127    A dictionary of the form {addr: [(source_symbol, source_location,
128    object_symbol_with_offset)]} where each address has a list of
129    associated symbols and locations.  The list is always non-empty.
130
131    If the function has been inlined then the list may contain
132    more than one element with the symbols for the most deeply
133    nested inlined location appearing first.  The list is
134    always non-empty, even if no information is available.
135
136    Usually you want to display the source_location and
137    object_symbol_with_offset from the last element in the list.
138  """
139  if not lib:
140    return None
141
142  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
143  if not addr_to_line:
144    return None
145
146  addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
147  if not addr_to_objdump:
148    return None
149
150  result = {}
151  for addr in unique_addrs:
152    source_info = addr_to_line.get(addr)
153    if not source_info:
154      source_info = [(None, None)]
155    if addr in addr_to_objdump:
156      (object_symbol, object_offset) = addr_to_objdump.get(addr)
157      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
158                                                         object_offset)
159    else:
160      object_symbol_with_offset = None
161    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
162        for (source_symbol, source_location) in source_info]
163
164  return result
165
166
167def CallAddr2LineForSet(lib, unique_addrs):
168  """Look up line and symbol information for a set of addresses.
169
170  Args:
171    lib: library (or executable) pathname containing symbols
172    unique_addrs: set of string hexidecimal addresses look up.
173
174  Returns:
175    A dictionary of the form {addr: [(symbol, file:line)]} where
176    each address has a list of associated symbols and locations
177    or an empty list if no symbol information was found.
178
179    If the function has been inlined then the list may contain
180    more than one element with the symbols for the most deeply
181    nested inlined location appearing first.
182  """
183  if not lib:
184    return None
185
186  symbols = SYMBOLS_DIR + lib
187  if not os.path.exists(symbols):
188    symbols = lib
189    if not os.path.exists(symbols):
190      return None
191
192  # Make sure the symbols path is not a directory.
193  if os.path.isdir(symbols):
194    return None
195
196  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
197      "--demangle", "--exe=" + symbols]
198  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
199
200  result = {}
201  addrs = sorted(unique_addrs)
202  for addr in addrs:
203    child.stdin.write("0x%s\n" % addr)
204    child.stdin.flush()
205    records = []
206    first = True
207    while True:
208      symbol = child.stdout.readline().strip()
209      if symbol == "??":
210        symbol = None
211      location = child.stdout.readline().strip()
212      if location == "??:0" or location == "??:?":
213        location = None
214      if symbol is None and location is None:
215        break
216      records.append((symbol, location))
217      if first:
218        # Write a blank line as a sentinel so we know when to stop
219        # reading inlines from the output.
220        # The blank line will cause addr2line to emit "??\n??:0\n".
221        child.stdin.write("\n")
222        first = False
223    result[addr] = records
224  child.stdin.close()
225  child.stdout.close()
226  return result
227
228
229def StripPC(addr):
230  """Strips the Thumb bit a program counter address when appropriate.
231
232  Args:
233    addr: the program counter address
234
235  Returns:
236    The stripped program counter address.
237  """
238  global ARCH
239  if ARCH == "arm":
240    return addr & ~1
241  return addr
242
243
244def CallObjdumpForSet(lib, unique_addrs):
245  """Use objdump to find out the names of the containing functions.
246
247  Args:
248    lib: library (or executable) pathname containing symbols
249    unique_addrs: set of string hexidecimal addresses to find the functions for.
250
251  Returns:
252    A dictionary of the form {addr: (string symbol, offset)}.
253  """
254  if not lib:
255    return None
256
257  symbols = SYMBOLS_DIR + lib
258  if not os.path.exists(symbols):
259    symbols = lib
260    if not os.path.exists(symbols):
261      return None
262
263  addrs = sorted(unique_addrs)
264  start_addr_dec = str(StripPC(int(addrs[0], 16)))
265  stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
266  cmd = [ToolPath("objdump"),
267         "--section=.text",
268         "--demangle",
269         "--disassemble",
270         "--start-address=" + start_addr_dec,
271         "--stop-address=" + stop_addr_dec,
272         symbols]
273
274  # Function lines look like:
275  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
276  # We pull out the address and function first. Then we check for an optional
277  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
278  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
279  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
280
281  # A disassembly line looks like:
282  #   177b2:	b510      	push	{r4, lr}
283  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
284
285  current_symbol = None    # The current function symbol in the disassembly.
286  current_symbol_addr = 0  # The address of the current function.
287  addr_index = 0  # The address that we are currently looking for.
288
289  stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
290  result = {}
291  for line in stream:
292    # Is it a function line like:
293    #   000177b0 <android::IBinder::~IBinder()>:
294    components = func_regexp.match(line)
295    if components:
296      # This is a new function, so record the current function and its address.
297      current_symbol_addr = int(components.group(1), 16)
298      current_symbol = components.group(2)
299
300      # Does it have an optional offset like: "foo(..)+0x2c"?
301      components = offset_regexp.match(current_symbol)
302      if components:
303        current_symbol = components.group(1)
304        offset = components.group(2)
305        if offset:
306          current_symbol_addr -= int(offset, 16)
307
308    # Is it an disassembly line like:
309    #   177b2:	b510      	push	{r4, lr}
310    components = asm_regexp.match(line)
311    if components:
312      addr = components.group(1)
313      target_addr = addrs[addr_index]
314      i_addr = int(addr, 16)
315      i_target = StripPC(int(target_addr, 16))
316      if i_addr == i_target:
317        result[target_addr] = (current_symbol, i_target - current_symbol_addr)
318        addr_index += 1
319        if addr_index >= len(addrs):
320          break
321  stream.close()
322
323  return result
324
325
326def CallCppFilt(mangled_symbol):
327  cmd = [ToolPath("c++filt")]
328  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
329  process.stdin.write(mangled_symbol)
330  process.stdin.write("\n")
331  process.stdin.close()
332  demangled_symbol = process.stdout.readline().strip()
333  process.stdout.close()
334  return demangled_symbol
335
336
337def FormatSymbolWithOffset(symbol, offset):
338  if offset == 0:
339    return symbol
340  return "%s+%d" % (symbol, offset)
341
342
343def GetAbiFromToolchain(toolchain_var, bits):
344  toolchain = os.environ.get(toolchain_var)
345  if not toolchain:
346    return None
347
348  toolchain_match = re.search("\/(aarch64|arm|mips|x86)\/", toolchain)
349  if toolchain_match:
350    abi = toolchain_match.group(1)
351    if abi == "aarch64":
352      return "arm64"
353    elif bits == 64:
354      if abi == "x86":
355        return "x86_64"
356      elif abi == "mips":
357        return "mips64"
358    return abi
359  return None
360
361def Get32BitArch():
362  # Check for ANDROID_TOOLCHAIN_2ND_ARCH first, if set, use that.
363  # If not try ANDROID_TOOLCHAIN to find the arch.
364  # If this is not set, then default to arm.
365  arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN_2ND_ARCH", 32)
366  if not arch:
367    arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 32)
368    if not arch:
369      return "arm"
370  return arch
371
372def Get64BitArch():
373  # Check for ANDROID_TOOLCHAIN, if it is set, we can figure out the
374  # arch this way. If this is not set, then default to arm64.
375  arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 64)
376  if not arch:
377    return "arm64"
378  return arch
379
380def SetAbi(lines):
381  global ARCH
382
383  abi_line = re.compile("ABI: \'(.*)\'")
384  trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
385  asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+")
386
387  ARCH = None
388  for line in lines:
389    abi_match = abi_line.search(line)
390    if abi_match:
391      ARCH = abi_match.group(1)
392      break
393    trace_match = trace_line.search(line)
394    if trace_match:
395      # Try to guess the arch, we know the bitness.
396      if len(trace_match.group(1)) == 16:
397        ARCH = Get64BitArch()
398      else:
399        ARCH = Get32BitArch()
400      break
401    asan_trace_match = asan_trace_line.search(line)
402    if asan_trace_match:
403      # We might be able to guess the bitness by the length of the address.
404      if len(asan_trace_match.group(1)) > 8:
405        ARCH = Get64BitArch()
406        # We know for a fact this is 64 bit, so we are done.
407        break
408      else:
409        ARCH = Get32BitArch()
410        # This might be 32 bit, or just a small address. Keep going in this
411        # case, but if we couldn't figure anything else out, go with 32 bit.
412  if not ARCH:
413    raise Exception("Could not determine arch from input, use --arch=XXX to specify it")
414
415
416class FindToolchainTests(unittest.TestCase):
417  def assert_toolchain_found(self, abi):
418    global ARCH
419    ARCH = abi
420    FindToolchain() # Will throw on failure.
421
422  def test_toolchains_found(self):
423    self.assert_toolchain_found("arm")
424    self.assert_toolchain_found("arm64")
425    self.assert_toolchain_found("mips")
426    self.assert_toolchain_found("x86")
427    self.assert_toolchain_found("x86_64")
428
429class SetArchTests(unittest.TestCase):
430  def test_abi_check(self):
431    global ARCH
432
433    SetAbi(["ABI: 'arm'"])
434    self.assertEqual(ARCH, "arm")
435    SetAbi(["ABI: 'arm64'"])
436    self.assertEqual(ARCH, "arm64")
437
438    SetAbi(["ABI: 'mips'"])
439    self.assertEqual(ARCH, "mips")
440    SetAbi(["ABI: 'mips64'"])
441    self.assertEqual(ARCH, "mips64")
442
443    SetAbi(["ABI: 'x86'"])
444    self.assertEqual(ARCH, "x86")
445    SetAbi(["ABI: 'x86_64'"])
446    self.assertEqual(ARCH, "x86_64")
447
448  def test_32bit_trace_line_toolchain(self):
449    global ARCH
450
451    os.environ.clear()
452    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
453    SetAbi(["#00 pc 000374e0"])
454    self.assertEqual(ARCH, "arm")
455
456    os.environ.clear()
457    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
458    SetAbi(["#00 pc 000374e0"])
459    self.assertEqual(ARCH, "mips")
460
461    os.environ.clear()
462    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
463    SetAbi(["#00 pc 000374e0"])
464    self.assertEqual(ARCH, "x86")
465
466  def test_32bit_trace_line_toolchain_2nd(self):
467    global ARCH
468
469    os.environ.clear()
470    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
471    os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin"
472    SetAbi(["#00 pc 000374e0"])
473    self.assertEqual(ARCH, "arm")
474
475    os.environ.clear()
476    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/mips/mips-linux-androideabi-4.9/bin"
477    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
478    SetAbi(["#00 pc 000374e0"])
479    self.assertEqual(ARCH, "mips")
480
481    os.environ.clear()
482    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin"
483    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
484    SetAbi(["#00 pc 000374e0"])
485    self.assertEqual(ARCH, "x86")
486
487  def test_64bit_trace_line_toolchain(self):
488    global ARCH
489
490    os.environ.clear()
491    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin"
492    SetAbi(["#00 pc 00000000000374e0"])
493    self.assertEqual(ARCH, "arm64")
494
495    os.environ.clear()
496    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
497    SetAbi(["#00 pc 00000000000374e0"])
498    self.assertEqual(ARCH, "mips64")
499
500    os.environ.clear()
501    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
502    SetAbi(["#00 pc 00000000000374e0"])
503    self.assertEqual(ARCH, "x86_64")
504
505  def test_trace_default_abis(self):
506    global ARCH
507
508    os.environ.clear()
509    SetAbi(["#00 pc 000374e0"])
510    self.assertEqual(ARCH, "arm")
511    SetAbi(["#00 pc 00000000000374e0"])
512    self.assertEqual(ARCH, "arm64")
513
514  def test_32bit_asan_trace_line_toolchain(self):
515    global ARCH
516
517    os.environ.clear()
518    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
519    SetAbi(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
520    self.assertEqual(ARCH, "arm")
521
522    os.environ.clear()
523    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
524    SetAbi(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
525    self.assertEqual(ARCH, "mips")
526
527    os.environ.clear()
528    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
529    SetAbi(["#10 0xb5eeba5d  (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"])
530    self.assertEqual(ARCH, "x86")
531
532  def test_32bit_asan_trace_line_toolchain_2nd(self):
533    global ARCH
534
535    os.environ.clear()
536    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
537    os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin"
538    SetAbi(["#3 0xae1725b5  (/system/vendor/lib/libllvm-glnext.so+0x6435b5)"])
539    self.assertEqual(ARCH, "arm")
540
541    os.environ.clear()
542    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/mips/mips-linux-androideabi-4.9/bin"
543    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
544    SetAbi(["#3 0xae1725b5  (/system/vendor/lib/libllvm-glnext.so+0x6435b5)"])
545    self.assertEqual(ARCH, "mips")
546
547    os.environ.clear()
548    os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin"
549    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
550    SetAbi(["#3 0xae1725b5  (/system/vendor/lib/libllvm-glnext.so+0x6435b5)"])
551    self.assertEqual(ARCH, "x86")
552
553  def test_64bit_asan_trace_line_toolchain(self):
554    global ARCH
555
556    os.environ.clear()
557    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin"
558    SetAbi(["#0 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
559    self.assertEqual(ARCH, "arm64")
560
561    os.environ.clear()
562    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
563    SetAbi(["#1 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
564    self.assertEqual(ARCH, "mips64")
565
566    os.environ.clear()
567    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
568    SetAbi(["#12 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
569    self.assertEqual(ARCH, "x86_64")
570
571    # Verify that if an address that might be 32 bit comes first, that
572    # encountering a 64 bit address returns a 64 bit abi.
573    ARCH = None
574    os.environ.clear()
575    os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
576    SetAbi(["#12 0x5d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)",
577            "#12 0x11b35d33bf  (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"])
578    self.assertEqual(ARCH, "x86_64")
579
580  def test_asan_trace_default_abis(self):
581    global ARCH
582
583    os.environ.clear()
584    SetAbi(["#4 0x1234349ab  (/system/vendor/lib/libllvm-glnext.so+0x64fc4f)"])
585    self.assertEqual(ARCH, "arm64")
586    SetAbi(["#1 0xae17ec4f  (/system/vendor/lib/libllvm-glnext.so+0x64fc4f)"])
587    self.assertEqual(ARCH, "arm")
588
589  def test_no_abi(self):
590    global ARCH
591
592    self.assertRaisesRegexp(Exception, "Could not determine arch from input, use --arch=XXX to specify it", SetAbi, [])
593
594if __name__ == '__main__':
595    unittest.main()
596