1#!/usr/bin/env python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
19import os
20import re
21import subprocess
22import symbol
23import tempfile
24import unittest
25
26import example_crashes
27
28def ConvertTrace(lines):
29  tracer = TraceConverter()
30  print "Reading symbols from", symbol.SYMBOLS_DIR
31  tracer.ConvertTrace(lines)
32
33class TraceConverter:
34  process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)")
35  revision_line = re.compile("(Revision: \'(.*)\')")
36  signal_line = re.compile("(signal [0-9]+ \(.*\).*)")
37  abort_message_line = re.compile("(Abort message: '.*')")
38  thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-")
39  dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
40  dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
41  register_line = re.compile("$a")
42  trace_line = re.compile("$a")
43  sanitizer_trace_line = re.compile("$a")
44  value_line = re.compile("$a")
45  code_line = re.compile("$a")
46  zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry")
47  zipinfo_central_info_match = re.compile(
48      "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)"
49      ".*^\s*compressed size:\s+(\d+)", re.M | re.S)
50  trace_lines = []
51  value_lines = []
52  last_frame = -1
53  width = "{8}"
54  spacing = ""
55  apk_info = dict()
56
57  register_names = {
58    "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr",
59    "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate",
60    "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
61    "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
62    "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags",
63    "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags",
64  }
65
66  def UpdateAbiRegexes(self):
67    if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
68      self.width = "{16}"
69      self.spacing = "        "
70    else:
71      self.width = "{8}"
72      self.spacing = ""
73
74    self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})")
75
76    # Note that both trace and value line matching allow for variable amounts of
77    # whitespace (e.g. \t). This is because the we want to allow for the stack
78    # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
79    # strips out double spaces that are found in tombsone files and logcat output.
80    #
81    # Examples of matched trace lines include lines from tombstone files like:
82    #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so
83    #
84    # Or lines from AndroidFeedback crash report system logs like:
85    #   03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
86    # Please note the spacing differences.
87    self.trace_line = re.compile(
88        ".*"                                                 # Random start stuff.
89        "\#(?P<frame>[0-9]+)"                                # Frame number.
90        "[ \t]+..[ \t]+"                                     # (space)pc(space).
91        "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+"       # Offset (hex number given without
92                                                             #         0x prefix).
93        "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)"                    # Library name.
94        "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?"       # Offset into the file to find the start of the shared so.
95        "(?P<symbolpresent> \((?P<symbol>.*)\))?")           # Is the symbol there?
96                                                             # pylint: disable-msg=C6310
97    # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
98    # its own regex. Example:
99    # 08-19 05:29:26.283   397   403 I         :     #0 0xb6a15237  (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
100    self.sanitizer_trace_line = re.compile(
101        ".*"                                                 # Random start stuff.
102        "\#(?P<frame>[0-9]+)"                                # Frame number.
103        "[ \t]+0x[0-9a-f]+[ \t]+"                            # PC, not interesting to us.
104        "\("                                                 # Opening paren.
105        "(?P<dso>[^+]+)"                                     # Library name.
106        "\+"                                                 # '+'
107        "0x(?P<offset>[0-9a-f]+)"                            # Offset (hex number given with
108                                                             #         0x prefix).
109        "\)")                                                # Closin paren.
110                                                             # pylint: disable-msg=C6310
111    # Examples of matched value lines include:
112    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so
113    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so (symbol)
114    #   03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
115    # Again, note the spacing differences.
116    self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?")
117    # Lines from 'code around' sections of the output will be matched before
118    # value lines because otheriwse the 'code around' sections will be confused as
119    # value lines.
120    #
121    # Examples include:
122    #   801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
123    #   03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
124    self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width +
125                                "[ \t]*[a-f0-9]" + self.width +
126                                "[ \t]*[a-f0-9]" + self.width +
127                                "[ \t]*[a-f0-9]" + self.width +
128                                "[ \t]*[a-f0-9]" + self.width +
129                                "[ \t]*[ \r\n]")  # pylint: disable-msg=C6310
130
131  def CleanLine(self, ln):
132    # AndroidFeedback adds zero width spaces into its crash reports. These
133    # should be removed or the regular expresssions will fail to match.
134    return unicode(ln, errors='ignore')
135
136  def PrintTraceLines(self, trace_lines):
137    """Print back trace."""
138    maxlen = max(map(lambda tl: len(tl[1]), trace_lines))
139    print
140    print "Stack Trace:"
141    print "  RELADDR   " + self.spacing + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
142    for tl in self.trace_lines:
143      (addr, symbol_with_offset, location) = tl
144      print "  %8s  %s  %s" % (addr, symbol_with_offset.ljust(maxlen), location)
145    return
146
147  def PrintValueLines(self, value_lines):
148    """Print stack data values."""
149    maxlen = max(map(lambda tl: len(tl[2]), self.value_lines))
150    print
151    print "Stack Data:"
152    print "  ADDR      " + self.spacing + "VALUE     " + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
153    for vl in self.value_lines:
154      (addr, value, symbol_with_offset, location) = vl
155      print "  %8s  %8s  %s  %s" % (addr, value, symbol_with_offset.ljust(maxlen), location)
156    return
157
158  def PrintOutput(self, trace_lines, value_lines):
159    if self.trace_lines:
160      self.PrintTraceLines(self.trace_lines)
161    if self.value_lines:
162      self.PrintValueLines(self.value_lines)
163
164  def PrintDivider(self):
165    print
166    print "-----------------------------------------------------\n"
167
168  def DeleteApkTmpFiles(self):
169    for _, _, tmp_files in self.apk_info.values():
170      for tmp_file in tmp_files.values():
171        os.unlink(tmp_file)
172
173  def ConvertTrace(self, lines):
174    lines = map(self.CleanLine, lines)
175    try:
176      if not symbol.ARCH:
177        symbol.SetAbi(lines)
178      self.UpdateAbiRegexes()
179      for line in lines:
180        self.ProcessLine(line)
181      self.PrintOutput(self.trace_lines, self.value_lines)
182    finally:
183      # Delete any temporary files created while processing the lines.
184      self.DeleteApkTmpFiles()
185
186  def MatchTraceLine(self, line):
187    if self.trace_line.match(line):
188      match = self.trace_line.match(line)
189      return {"frame": match.group("frame"),
190              "offset": match.group("offset"),
191              "so_offset": match.group("so_offset"),
192              "dso": match.group("dso"),
193              "symbol_present": bool(match.group("symbolpresent")),
194              "symbol_name": match.group("symbol")}
195    if self.sanitizer_trace_line.match(line):
196      match = self.sanitizer_trace_line.match(line)
197      return {"frame": match.group("frame"),
198              "offset": match.group("offset"),
199              "so_offset": None,
200              "dso": match.group("dso"),
201              "symbol_present": False,
202              "symbol_name": None}
203    return None
204
205  def ExtractLibFromApk(self, apk, shared_lib_name):
206    # Create a temporary file containing the shared library from the apk.
207    tmp_file = None
208    try:
209      tmp_fd, tmp_file = tempfile.mkstemp()
210      if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0:
211        os.close(tmp_fd)
212        shared_file = tmp_file
213        tmp_file = None
214        return shared_file
215    finally:
216      if tmp_file:
217        os.close(tmp_fd)
218        os.unlink(tmp_file)
219    return None
220
221  def ProcessCentralInfo(self, offset_list, central_info):
222    match = self.zipinfo_central_info_match.search(central_info)
223    if not match:
224      raise Exception("Cannot find all info from zipinfo\n" + central_info)
225    name = match.group(1)
226    start = int(match.group(2))
227    end = start + int(match.group(3))
228
229    offset_list.append([name, start, end])
230    return name, start, end
231
232  def GetLibFromApk(self, apk, offset):
233    # Convert the string to hex.
234    offset = int(offset, 16)
235
236    # Check if we already have information about this offset.
237    if apk in self.apk_info:
238      apk_full_path, offset_list, tmp_files = self.apk_info[apk]
239      for file_name, start, end in offset_list:
240        if offset >= start and offset < end:
241          if file_name in tmp_files:
242            return file_name, tmp_files[file_name]
243          tmp_file = self.ExtractLibFromApk(apk_full_path, file_name)
244          if tmp_file:
245            tmp_files[file_name] = tmp_file
246            return file_name, tmp_file
247          break
248      return None, None
249
250    if not "ANDROID_PRODUCT_OUT" in os.environ:
251      print "ANDROID_PRODUCT_OUT environment variable not set."
252      return None, None
253    out_dir = os.environ["ANDROID_PRODUCT_OUT"]
254    if not os.path.exists(out_dir):
255      print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist."
256      return None, None
257    if apk.startswith("/"):
258      apk_full_path = out_dir + apk
259    else:
260      apk_full_path = os.path.join(out_dir, apk)
261    if not os.path.exists(apk_full_path):
262      print "Cannot find apk " + apk;
263      return None, None
264
265    cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE)
266    # Find the first central info marker.
267    for line in cmd.stdout:
268      if self.zipinfo_central_directory_line.search(line):
269        break
270
271    central_info = ""
272    file_name = None
273    offset_list = []
274    for line in cmd.stdout:
275      match = self.zipinfo_central_directory_line.search(line)
276      if match:
277        cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
278        if not file_name and offset >= start and offset < end:
279          file_name = cur_name
280        central_info = ""
281      else:
282        central_info += line
283    if central_info:
284      cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
285      if not file_name and offset >= start and offset < end:
286        file_name = cur_name
287
288    # Save the information from the zip.
289    tmp_files = dict()
290    self.apk_info[apk] = [apk_full_path, offset_list, tmp_files]
291    if not file_name:
292      return None, None
293    tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name)
294    if tmp_shared_lib:
295      tmp_files[file_name] = tmp_shared_lib
296      return file_name, tmp_shared_lib
297    return None, None
298
299  def ProcessLine(self, line):
300    ret = False
301    process_header = self.process_info_line.search(line)
302    signal_header = self.signal_line.search(line)
303    abort_message_header = self.abort_message_line.search(line)
304    thread_header = self.thread_line.search(line)
305    register_header = self.register_line.search(line)
306    revision_header = self.revision_line.search(line)
307    dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
308    dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
309    if process_header or signal_header or abort_message_header or thread_header or \
310        register_header or dalvik_jni_thread_header or dalvik_native_thread_header or revision_header:
311      if self.trace_lines or self.value_lines:
312        self.PrintOutput(self.trace_lines, self.value_lines)
313        self.PrintDivider()
314        self.trace_lines = []
315        self.value_lines = []
316        self.last_frame = -1
317      if process_header:
318        print process_header.group(1)
319      if signal_header:
320        print signal_header.group(1)
321      if abort_message_header:
322        print abort_message_header.group(1)
323      if register_header:
324        print register_header.group(1)
325      if thread_header:
326        print thread_header.group(1)
327      if dalvik_jni_thread_header:
328        print dalvik_jni_thread_header.group(1)
329      if dalvik_native_thread_header:
330        print dalvik_native_thread_header.group(1)
331      if revision_header:
332        print revision_header.group(1)
333      return True
334    trace_line_dict = self.MatchTraceLine(line)
335    if trace_line_dict is not None:
336      ret = True
337      frame = trace_line_dict["frame"]
338      code_addr = trace_line_dict["offset"]
339      area = trace_line_dict["dso"]
340      so_offset = trace_line_dict["so_offset"]
341      symbol_present = trace_line_dict["symbol_present"]
342      symbol_name = trace_line_dict["symbol_name"]
343
344      if frame <= self.last_frame and (self.trace_lines or self.value_lines):
345        self.PrintOutput(self.trace_lines, self.value_lines)
346        self.PrintDivider()
347        self.trace_lines = []
348        self.value_lines = []
349      self.last_frame = frame
350
351      if area == "<unknown>" or area == "[heap]" or area == "[stack]":
352        self.trace_lines.append((code_addr, "", area))
353      else:
354        # If this is an apk, it usually means that there is actually
355        # a shared so that was loaded directly out of it. In that case,
356        # extract the shared library and the name of the shared library.
357        lib = None
358        if area.endswith(".apk") and so_offset:
359          lib_name, lib = self.GetLibFromApk(area, so_offset)
360        if not lib:
361          lib = area
362          lib_name = None
363
364        # If a calls b which further calls c and c is inlined to b, we want to
365        # display "a -> b -> c" in the stack trace instead of just "a -> c"
366        info = symbol.SymbolInformation(lib, code_addr)
367        nest_count = len(info) - 1
368        for (source_symbol, source_location, object_symbol_with_offset) in info:
369          if not source_symbol:
370            if symbol_present:
371              source_symbol = symbol.CallCppFilt(symbol_name)
372            else:
373              source_symbol = "<unknown>"
374          if not source_location:
375            source_location = area
376            if lib_name:
377              source_location += "(" + lib_name + ")"
378          if nest_count > 0:
379            nest_count = nest_count - 1
380            arrow = "v------>"
381            if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
382              arrow = "v-------------->"
383            self.trace_lines.append((arrow, source_symbol, source_location))
384          else:
385            if not object_symbol_with_offset:
386              object_symbol_with_offset = source_symbol
387            self.trace_lines.append((code_addr,
388                                object_symbol_with_offset,
389                                source_location))
390    if self.code_line.match(line):
391      # Code lines should be ignored. If this were exluded the 'code around'
392      # sections would trigger value_line matches.
393      return ret
394    if self.value_line.match(line):
395      ret = True
396      match = self.value_line.match(line)
397      (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
398      if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
399        self.value_lines.append((addr, value, "", area))
400      else:
401        info = symbol.SymbolInformation(area, value)
402        (source_symbol, source_location, object_symbol_with_offset) = info.pop()
403        if not source_symbol:
404          if symbol_present:
405            source_symbol = symbol.CallCppFilt(symbol_name)
406          else:
407            source_symbol = "<unknown>"
408        if not source_location:
409          source_location = area
410        if not object_symbol_with_offset:
411          object_symbol_with_offset = source_symbol
412        self.value_lines.append((addr,
413                            value,
414                            object_symbol_with_offset,
415                            source_location))
416
417    return ret
418
419
420class RegisterPatternTests(unittest.TestCase):
421  def assert_register_matches(self, abi, example_crash, stupid_pattern):
422    tc = TraceConverter()
423    lines = example_crash.split('\n')
424    symbol.SetAbi(lines)
425    tc.UpdateAbiRegexes()
426    for line in lines:
427      tc.ProcessLine(line)
428      is_register = (re.search(stupid_pattern, line) is not None)
429      matched = (tc.register_line.search(line) is not None)
430      self.assertEquals(matched, is_register, line)
431    tc.PrintOutput(tc.trace_lines, tc.value_lines)
432
433  def test_arm_registers(self):
434    self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b')
435
436  def test_arm64_registers(self):
437    self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b')
438
439  def test_mips_registers(self):
440    self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b')
441
442  def test_mips64_registers(self):
443    self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b')
444
445  def test_x86_registers(self):
446    self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
447
448  def test_x86_64_registers(self):
449    self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
450
451
452if __name__ == '__main__':
453    unittest.main()
454