1#!/usr/bin/env python3
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
19import os
20import re
21import subprocess
22import symbol
23import tempfile
24import unittest
25
26import example_crashes
27
28def ConvertTrace(lines):
29  tracer = TraceConverter()
30  print("Reading symbols from", symbol.SYMBOLS_DIR)
31  tracer.ConvertTrace(lines)
32
33class TraceConverter:
34  process_info_line = re.compile(r"(pid: [0-9]+, tid: [0-9]+.*)")
35  revision_line = re.compile(r"(Revision: '(.*)')")
36  signal_line = re.compile(r"(signal [0-9]+ \(.*\).*)")
37  abort_message_line = re.compile(r"(Abort message: '.*')")
38  thread_line = re.compile(r"(.*)(--- ){15}---")
39  dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
40  dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
41  register_line = re.compile("$a")
42  trace_line = re.compile("$a")
43  sanitizer_trace_line = re.compile("$a")
44  value_line = re.compile("$a")
45  code_line = re.compile("$a")
46  zipinfo_central_directory_line = re.compile(r"Central\s+directory\s+entry")
47  zipinfo_central_info_match = re.compile(
48      r"^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)"
49      r".*^\s*compressed size:\s+(\d+)", re.M | re.S)
50  unreachable_line = re.compile(r"((\d+ bytes in \d+ unreachable allocations)|"
51                                r"(\d+ bytes unreachable at [0-9a-f]+)|"
52                                r"(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"
53                                r"(and \d+ similar unreachable bytes in \d+ allocation(s)?))")
54  trace_lines = []
55  value_lines = []
56  last_frame = -1
57  width = "{8}"
58  spacing = ""
59  apk_info = dict()
60
61  register_names = {
62    "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr",
63    "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate",
64    "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
65    "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
66    "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags",
67    "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags",
68  }
69
70  def UpdateAbiRegexes(self):
71    if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
72      self.width = "{16}"
73      self.spacing = "        "
74    else:
75      self.width = "{8}"
76      self.spacing = ""
77
78    self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})")
79
80    # Note that both trace and value line matching allow for variable amounts of
81    # whitespace (e.g. \t). This is because the we want to allow for the stack
82    # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
83    # strips out double spaces that are found in tombsone files and logcat output.
84    #
85    # Examples of matched trace lines include lines from tombstone files like:
86    #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so
87    #
88    # Or lines from AndroidFeedback crash report system logs like:
89    #   03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
90    # Please note the spacing differences.
91    self.trace_line = re.compile(
92        r".*"                                                 # Random start stuff.
93        r"\#(?P<frame>[0-9]+)"                                # Frame number.
94        r"[ \t]+..[ \t]+"                                     # (space)pc(space).
95        r"(?P<offset>[0-9a-f]" + self.width + ")[ \t]+"       # Offset (hex number given without
96                                                              #         0x prefix).
97        r"(?P<dso>\[[^\]]+\]|[^\r\n \t]*)"                    # Library name.
98        r"( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?"       # Offset into the file to find the start of the shared so.
99        r"(?P<symbolpresent> \((?P<symbol>.*)\))?")           # Is the symbol there?
100                                                              # pylint: disable-msg=C6310
101    # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
102    # its own regex. Example:
103    # 08-19 05:29:26.283   397   403 I         :     #0 0xb6a15237  (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
104    self.sanitizer_trace_line = re.compile(
105        r".*"                                                 # Random start stuff.
106        r"\#(?P<frame>[0-9]+)"                                # Frame number.
107        r"[ \t]+0x[0-9a-f]+[ \t]+"                            # PC, not interesting to us.
108        r"\("                                                 # Opening paren.
109        r"(?P<dso>[^+]+)"                                     # Library name.
110        r"\+"                                                 # '+'
111        r"0x(?P<offset>[0-9a-f]+)"                            # Offset (hex number given with
112                                                              #         0x prefix).
113        r"\)")                                                # Closing paren.
114                                                              # pylint: disable-msg=C6310
115    # Examples of matched value lines include:
116    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so
117    #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so (symbol)
118    #   03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
119    # Again, note the spacing differences.
120    self.value_line = re.compile(r"(.*)([0-9a-f]" + self.width + r")[ \t]+([0-9a-f]" + self.width + r")[ \t]+([^\r\n \t]*)( \((.*)\))?")
121    # Lines from 'code around' sections of the output will be matched before
122    # value lines because otheriwse the 'code around' sections will be confused as
123    # value lines.
124    #
125    # Examples include:
126    #   801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
127    #   03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
128    self.code_line = re.compile(r"(.*)[ \t]*[a-f0-9]" + self.width +
129                                r"[ \t]*[a-f0-9]" + self.width +
130                                r"[ \t]*[a-f0-9]" + self.width +
131                                r"[ \t]*[a-f0-9]" + self.width +
132                                r"[ \t]*[a-f0-9]" + self.width +
133                                r"[ \t]*[ \r\n]")  # pylint: disable-msg=C6310
134
135  def CleanLine(self, ln):
136    # AndroidFeedback adds zero width spaces into its crash reports. These
137    # should be removed or the regular expresssions will fail to match.
138    return ln.encode().decode(encoding='utf8', errors='ignore')
139
140  def PrintTraceLines(self, trace_lines):
141    """Print back trace."""
142    maxlen = max(len(tl[1]) for tl in trace_lines)
143    print("\nStack Trace:")
144    print("  RELADDR   " + self.spacing + "FUNCTION".ljust(maxlen) + "  FILE:LINE")
145    for tl in self.trace_lines:
146      (addr, symbol_with_offset, location) = tl
147      print("  %8s  %s  %s" % (addr, symbol_with_offset.ljust(maxlen), location))
148
149  def PrintValueLines(self, value_lines):
150    """Print stack data values."""
151    maxlen = max(len(tl[2]) for tl in self.value_lines)
152    print("\nStack Data:")
153    print("  ADDR      " + self.spacing + "VALUE     " + "FUNCTION".ljust(maxlen) + "  FILE:LINE")
154    for vl in self.value_lines:
155      (addr, value, symbol_with_offset, location) = vl
156      print("  %8s  %8s  %s  %s" % (addr, value, symbol_with_offset.ljust(maxlen), location))
157
158  def PrintOutput(self, trace_lines, value_lines):
159    if self.trace_lines:
160      self.PrintTraceLines(self.trace_lines)
161    if self.value_lines:
162      self.PrintValueLines(self.value_lines)
163
164  def PrintDivider(self):
165    print("\n-----------------------------------------------------\n")
166
167  def DeleteApkTmpFiles(self):
168    for _, _, tmp_files in self.apk_info.values():
169      for tmp_file in tmp_files.values():
170        os.unlink(tmp_file)
171
172  def ConvertTrace(self, lines):
173    lines = [self.CleanLine(line) for line in lines]
174    try:
175      if not symbol.ARCH:
176        symbol.SetAbi(lines)
177      self.UpdateAbiRegexes()
178      for line in lines:
179        self.ProcessLine(line)
180      self.PrintOutput(self.trace_lines, self.value_lines)
181    finally:
182      # Delete any temporary files created while processing the lines.
183      self.DeleteApkTmpFiles()
184
185  def MatchTraceLine(self, line):
186    if self.trace_line.match(line):
187      match = self.trace_line.match(line)
188      return {"frame": match.group("frame"),
189              "offset": match.group("offset"),
190              "so_offset": match.group("so_offset"),
191              "dso": match.group("dso"),
192              "symbol_present": bool(match.group("symbolpresent")),
193              "symbol_name": match.group("symbol")}
194    if self.sanitizer_trace_line.match(line):
195      match = self.sanitizer_trace_line.match(line)
196      return {"frame": match.group("frame"),
197              "offset": match.group("offset"),
198              "so_offset": None,
199              "dso": match.group("dso"),
200              "symbol_present": False,
201              "symbol_name": None}
202    return None
203
204  def ExtractLibFromApk(self, apk, shared_lib_name):
205    # Create a temporary file containing the shared library from the apk.
206    tmp_file = None
207    try:
208      tmp_fd, tmp_file = tempfile.mkstemp()
209      if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0:
210        os.close(tmp_fd)
211        shared_file = tmp_file
212        tmp_file = None
213        return shared_file
214    finally:
215      if tmp_file:
216        os.close(tmp_fd)
217        os.unlink(tmp_file)
218    return None
219
220  def ProcessCentralInfo(self, offset_list, central_info):
221    match = self.zipinfo_central_info_match.search(central_info)
222    if not match:
223      raise Exception("Cannot find all info from zipinfo\n" + central_info)
224    name = match.group(1)
225    start = int(match.group(2))
226    end = start + int(match.group(3))
227
228    offset_list.append([name, start, end])
229    return name, start, end
230
231  def GetLibFromApk(self, apk, offset):
232    # Convert the string to hex.
233    offset = int(offset, 16)
234
235    # Check if we already have information about this offset.
236    if apk in self.apk_info:
237      apk_full_path, offset_list, tmp_files = self.apk_info[apk]
238      for file_name, start, end in offset_list:
239        if offset >= start and offset < end:
240          if file_name in tmp_files:
241            return file_name, tmp_files[file_name]
242          tmp_file = self.ExtractLibFromApk(apk_full_path, file_name)
243          if tmp_file:
244            tmp_files[file_name] = tmp_file
245            return file_name, tmp_file
246          break
247      return None, None
248
249    if not "ANDROID_PRODUCT_OUT" in os.environ:
250      print("ANDROID_PRODUCT_OUT environment variable not set.")
251      return None, None
252    out_dir = os.environ["ANDROID_PRODUCT_OUT"]
253    if not os.path.exists(out_dir):
254      print("ANDROID_PRODUCT_OUT", out_dir, "does not exist.")
255      return None, None
256    if apk.startswith("/"):
257      apk_full_path = out_dir + apk
258    else:
259      apk_full_path = os.path.join(out_dir, apk)
260    if not os.path.exists(apk_full_path):
261      print("Cannot find apk", apk)
262      return None, None
263
264    cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE)
265    # Find the first central info marker.
266    for line in cmd.stdout:
267      if self.zipinfo_central_directory_line.search(line):
268        break
269
270    central_info = ""
271    file_name = None
272    offset_list = []
273    for line in cmd.stdout:
274      match = self.zipinfo_central_directory_line.search(line)
275      if match:
276        cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
277        if not file_name and offset >= start and offset < end:
278          file_name = cur_name
279        central_info = ""
280      else:
281        central_info += line
282    if central_info:
283      cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
284      if not file_name and offset >= start and offset < end:
285        file_name = cur_name
286
287    # Save the information from the zip.
288    tmp_files = dict()
289    self.apk_info[apk] = [apk_full_path, offset_list, tmp_files]
290    if not file_name:
291      return None, None
292    tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name)
293    if tmp_shared_lib:
294      tmp_files[file_name] = tmp_shared_lib
295      return file_name, tmp_shared_lib
296    return None, None
297
298  def ProcessLine(self, line):
299    ret = False
300    process_header = self.process_info_line.search(line)
301    signal_header = self.signal_line.search(line)
302    abort_message_header = self.abort_message_line.search(line)
303    thread_header = self.thread_line.search(line)
304    register_header = self.register_line.search(line)
305    revision_header = self.revision_line.search(line)
306    dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
307    dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
308    unreachable_header = self.unreachable_line.search(line)
309    if process_header or signal_header or abort_message_header or thread_header or \
310        register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \
311        revision_header or unreachable_header:
312      ret = True
313      if self.trace_lines or self.value_lines:
314        self.PrintOutput(self.trace_lines, self.value_lines)
315        self.PrintDivider()
316        self.trace_lines = []
317        self.value_lines = []
318        self.last_frame = -1
319      if process_header:
320        print(process_header.group(1))
321      if signal_header:
322        print(signal_header.group(1))
323      if abort_message_header:
324        print(abort_message_header.group(1))
325      if register_header:
326        print(register_header.group(1))
327      if thread_header:
328        print(thread_header.group(1))
329      if dalvik_jni_thread_header:
330        print(dalvik_jni_thread_header.group(1))
331      if dalvik_native_thread_header:
332        print(dalvik_native_thread_header.group(1))
333      if revision_header:
334        print(revision_header.group(1))
335      if unreachable_header:
336        print(unreachable_header.group(1))
337      return True
338    trace_line_dict = self.MatchTraceLine(line)
339    if trace_line_dict is not None:
340      ret = True
341      frame = int(trace_line_dict["frame"])
342      code_addr = trace_line_dict["offset"]
343      area = trace_line_dict["dso"]
344      so_offset = trace_line_dict["so_offset"]
345      symbol_present = trace_line_dict["symbol_present"]
346      symbol_name = trace_line_dict["symbol_name"]
347
348      if frame <= self.last_frame and (self.trace_lines or self.value_lines):
349        self.PrintOutput(self.trace_lines, self.value_lines)
350        self.PrintDivider()
351        self.trace_lines = []
352        self.value_lines = []
353      self.last_frame = frame
354
355      if area == "<unknown>" or area == "[heap]" or area == "[stack]":
356        self.trace_lines.append((code_addr, "", area))
357      else:
358        # If this is an apk, it usually means that there is actually
359        # a shared so that was loaded directly out of it. In that case,
360        # extract the shared library and the name of the shared library.
361        lib = None
362        # The format of the map name:
363        #   Some.apk!libshared.so
364        # or
365        #   Some.apk
366        if so_offset:
367          # If it ends in apk, we are done.
368          apk = None
369          if area.endswith(".apk"):
370            apk = area
371          else:
372            index = area.rfind(".so!")
373            if index != -1:
374              # Sometimes we'll see something like:
375              #   #01 pc abcd  libart.so!libart.so (offset 0x134000)
376              # Remove everything after the ! and zero the offset value.
377              area = area[0:index + 3]
378              so_offset = 0
379            else:
380              index = area.rfind(".apk!")
381              if index != -1:
382                apk = area[0:index + 4]
383          if apk:
384            lib_name, lib = self.GetLibFromApk(apk, so_offset)
385        if not lib:
386          lib = area
387          lib_name = None
388
389        # When using atest, test paths are different between the out/ directory
390        # and device. Apply fixups.
391        if lib.startswith("/data/local/tests/") or lib.startswith("/data/local/tmp/"):
392          test_name = lib.rsplit("/", 1)[-1]
393          prefix = "/data/nativetest"
394          if symbol.ARCH.endswith("64"):
395            prefix += "64"
396          if lib.startswith("/data/local/tests/vendor/"):
397            prefix += "/vendor"
398          lib = prefix + "/" + test_name + "/" + test_name
399
400        # If a calls b which further calls c and c is inlined to b, we want to
401        # display "a -> b -> c" in the stack trace instead of just "a -> c"
402        info = symbol.SymbolInformation(lib, code_addr)
403        nest_count = len(info) - 1
404        for (source_symbol, source_location, object_symbol_with_offset) in info:
405          if not source_symbol:
406            if symbol_present:
407              source_symbol = symbol.CallCppFilt(symbol_name)
408            else:
409              source_symbol = "<unknown>"
410          if not source_location:
411            source_location = area
412            if lib_name:
413              source_location += "(" + lib_name + ")"
414          if nest_count > 0:
415            nest_count = nest_count - 1
416            arrow = "v------>"
417            if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
418              arrow = "v-------------->"
419            self.trace_lines.append((arrow, source_symbol, source_location))
420          else:
421            if not object_symbol_with_offset:
422              object_symbol_with_offset = source_symbol
423            self.trace_lines.append((code_addr,
424                                object_symbol_with_offset,
425                                source_location))
426    if self.code_line.match(line):
427      # Code lines should be ignored. If this were exluded the 'code around'
428      # sections would trigger value_line matches.
429      return ret
430    if self.value_line.match(line):
431      ret = True
432      match = self.value_line.match(line)
433      (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
434      if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
435        self.value_lines.append((addr, value, "", area))
436      else:
437        info = symbol.SymbolInformation(area, value)
438        (source_symbol, source_location, object_symbol_with_offset) = info.pop()
439        # If there is no information, skip this.
440        if source_symbol or source_location or object_symbol_with_offset:
441          if not source_symbol:
442            if symbol_present:
443              source_symbol = symbol.CallCppFilt(symbol_name)
444            else:
445              source_symbol = "<unknown>"
446          if not source_location:
447            source_location = area
448          if not object_symbol_with_offset:
449            object_symbol_with_offset = source_symbol
450          self.value_lines.append((addr,
451                                   value,
452                                   object_symbol_with_offset,
453                                   source_location))
454
455    return ret
456
457
458class RegisterPatternTests(unittest.TestCase):
459  def assert_register_matches(self, abi, example_crash, stupid_pattern):
460    tc = TraceConverter()
461    lines = example_crash.split('\n')
462    symbol.SetAbi(lines)
463    tc.UpdateAbiRegexes()
464    for line in lines:
465      tc.ProcessLine(line)
466      is_register = (re.search(stupid_pattern, line) is not None)
467      matched = (tc.register_line.search(line) is not None)
468      self.assertEqual(matched, is_register, line)
469    tc.PrintOutput(tc.trace_lines, tc.value_lines)
470
471  def test_arm_registers(self):
472    self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b')
473
474  def test_arm64_registers(self):
475    self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b')
476
477  def test_mips_registers(self):
478    self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b')
479
480  def test_mips64_registers(self):
481    self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b')
482
483  def test_x86_registers(self):
484    self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
485
486  def test_x86_64_registers(self):
487    self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
488
489class LibmemunreachablePatternTests(unittest.TestCase):
490  def test_libmemunreachable(self):
491    tc = TraceConverter()
492    lines = example_crashes.libmemunreachable.split('\n')
493
494    symbol.SetAbi(lines)
495    self.assertEqual(symbol.ARCH, "arm")
496
497    tc.UpdateAbiRegexes()
498    header_lines = 0
499    trace_lines = 0
500    for line in lines:
501      tc.ProcessLine(line)
502      if re.search(tc.unreachable_line, line) is not None:
503        header_lines += 1
504      if tc.MatchTraceLine(line) is not None:
505        trace_lines += 1
506    self.assertEqual(header_lines, 3)
507    self.assertEqual(trace_lines, 2)
508    tc.PrintOutput(tc.trace_lines, tc.value_lines)
509
510class LongASANStackTests(unittest.TestCase):
511  # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two
512  # when the frame number becomes two digits. This happened before as the frame number was
513  # handled as a string and not converted to an integral.
514  def test_long_asan_crash(self):
515    tc = TraceConverter()
516    lines = example_crashes.long_asan_crash.splitlines()
517    symbol.SetAbi(lines)
518    tc.UpdateAbiRegexes()
519    # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace
520    # is split, a separator is printed and trace_lines is flushed.
521    trace_line_count = 0
522    for line in lines:
523      tc.ProcessLine(line)
524      self.assertLessEqual(trace_line_count, len(tc.trace_lines))
525      trace_line_count = len(tc.trace_lines)
526    # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored)
527    # more than ten frames.
528    self.assertGreater(trace_line_count, 10)
529    tc.PrintOutput(tc.trace_lines, tc.value_lines)
530
531class ValueLinesTest(unittest.TestCase):
532  def test_value_line_skipped(self):
533    tc = TraceConverter()
534    symbol.SetAbi(["ABI: 'arm'"])
535    tc.UpdateAbiRegexes()
536    tc.ProcessLine("    12345678  00001000  .")
537    self.assertEqual([], tc.value_lines)
538
539if __name__ == '__main__':
540    unittest.main(verbosity=2)
541