1#!/usr/bin/env python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""stack symbolizes native crash dumps.""" 18 19import os 20import re 21import subprocess 22import symbol 23import tempfile 24import unittest 25 26import example_crashes 27 28def ConvertTrace(lines): 29 tracer = TraceConverter() 30 print "Reading symbols from", symbol.SYMBOLS_DIR 31 tracer.ConvertTrace(lines) 32 33class TraceConverter: 34 process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)") 35 revision_line = re.compile("(Revision: \'(.*)\')") 36 signal_line = re.compile("(signal [0-9]+ \(.*\).*)") 37 abort_message_line = re.compile("(Abort message: '.*')") 38 thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-") 39 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)") 40 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)") 41 register_line = re.compile("$a") 42 trace_line = re.compile("$a") 43 sanitizer_trace_line = re.compile("$a") 44 value_line = re.compile("$a") 45 code_line = re.compile("$a") 46 zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry") 47 zipinfo_central_info_match = re.compile( 48 "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)" 49 ".*^\s*compressed size:\s+(\d+)", re.M | re.S) 50 unreachable_line = re.compile("((\d+ bytes in \d+ unreachable allocations)|"+\ 51 "(\d+ bytes unreachable at [0-9a-f]+)|"+\ 52 "(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"+\ 53 "(and \d+ similar unreachable bytes in \d+ allocation(s)?))") 54 trace_lines = [] 55 value_lines = [] 56 last_frame = -1 57 width = "{8}" 58 spacing = "" 59 apk_info = dict() 60 61 register_names = { 62 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr", 63 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate", 64 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 65 "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 66 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags", 67 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", 68 } 69 70 def UpdateAbiRegexes(self): 71 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 72 self.width = "{16}" 73 self.spacing = " " 74 else: 75 self.width = "{8}" 76 self.spacing = "" 77 78 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})") 79 80 # Note that both trace and value line matching allow for variable amounts of 81 # whitespace (e.g. \t). This is because the we want to allow for the stack 82 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback 83 # strips out double spaces that are found in tombsone files and logcat output. 84 # 85 # Examples of matched trace lines include lines from tombstone files like: 86 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 87 # 88 # Or lines from AndroidFeedback crash report system logs like: 89 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 90 # Please note the spacing differences. 91 self.trace_line = re.compile( 92 ".*" # Random start stuff. 93 "\#(?P<frame>[0-9]+)" # Frame number. 94 "[ \t]+..[ \t]+" # (space)pc(space). 95 "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without 96 # 0x prefix). 97 "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name. 98 "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. 99 "(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there? 100 # pylint: disable-msg=C6310 101 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as 102 # its own regex. Example: 103 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237) 104 self.sanitizer_trace_line = re.compile( 105 ".*" # Random start stuff. 106 "\#(?P<frame>[0-9]+)" # Frame number. 107 "[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us. 108 "\(" # Opening paren. 109 "(?P<dso>[^+]+)" # Library name. 110 "\+" # '+' 111 "0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with 112 # 0x prefix). 113 "\)") # Closin paren. 114 # pylint: disable-msg=C6310 115 # Examples of matched value lines include: 116 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 117 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol) 118 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 119 # Again, note the spacing differences. 120 self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?") 121 # Lines from 'code around' sections of the output will be matched before 122 # value lines because otheriwse the 'code around' sections will be confused as 123 # value lines. 124 # 125 # Examples include: 126 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 127 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 128 self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width + 129 "[ \t]*[a-f0-9]" + self.width + 130 "[ \t]*[a-f0-9]" + self.width + 131 "[ \t]*[a-f0-9]" + self.width + 132 "[ \t]*[a-f0-9]" + self.width + 133 "[ \t]*[ \r\n]") # pylint: disable-msg=C6310 134 135 def CleanLine(self, ln): 136 # AndroidFeedback adds zero width spaces into its crash reports. These 137 # should be removed or the regular expresssions will fail to match. 138 return unicode(ln, errors='ignore') 139 140 def PrintTraceLines(self, trace_lines): 141 """Print back trace.""" 142 maxlen = max(map(lambda tl: len(tl[1]), trace_lines)) 143 print 144 print "Stack Trace:" 145 print " RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE" 146 for tl in self.trace_lines: 147 (addr, symbol_with_offset, location) = tl 148 print " %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location) 149 return 150 151 def PrintValueLines(self, value_lines): 152 """Print stack data values.""" 153 maxlen = max(map(lambda tl: len(tl[2]), self.value_lines)) 154 print 155 print "Stack Data:" 156 print " ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE" 157 for vl in self.value_lines: 158 (addr, value, symbol_with_offset, location) = vl 159 print " %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location) 160 return 161 162 def PrintOutput(self, trace_lines, value_lines): 163 if self.trace_lines: 164 self.PrintTraceLines(self.trace_lines) 165 if self.value_lines: 166 self.PrintValueLines(self.value_lines) 167 168 def PrintDivider(self): 169 print 170 print "-----------------------------------------------------\n" 171 172 def DeleteApkTmpFiles(self): 173 for _, _, tmp_files in self.apk_info.values(): 174 for tmp_file in tmp_files.values(): 175 os.unlink(tmp_file) 176 177 def ConvertTrace(self, lines): 178 lines = map(self.CleanLine, lines) 179 try: 180 if not symbol.ARCH: 181 symbol.SetAbi(lines) 182 self.UpdateAbiRegexes() 183 for line in lines: 184 self.ProcessLine(line) 185 self.PrintOutput(self.trace_lines, self.value_lines) 186 finally: 187 # Delete any temporary files created while processing the lines. 188 self.DeleteApkTmpFiles() 189 190 def MatchTraceLine(self, line): 191 if self.trace_line.match(line): 192 match = self.trace_line.match(line) 193 return {"frame": match.group("frame"), 194 "offset": match.group("offset"), 195 "so_offset": match.group("so_offset"), 196 "dso": match.group("dso"), 197 "symbol_present": bool(match.group("symbolpresent")), 198 "symbol_name": match.group("symbol")} 199 if self.sanitizer_trace_line.match(line): 200 match = self.sanitizer_trace_line.match(line) 201 return {"frame": match.group("frame"), 202 "offset": match.group("offset"), 203 "so_offset": None, 204 "dso": match.group("dso"), 205 "symbol_present": False, 206 "symbol_name": None} 207 return None 208 209 def ExtractLibFromApk(self, apk, shared_lib_name): 210 # Create a temporary file containing the shared library from the apk. 211 tmp_file = None 212 try: 213 tmp_fd, tmp_file = tempfile.mkstemp() 214 if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0: 215 os.close(tmp_fd) 216 shared_file = tmp_file 217 tmp_file = None 218 return shared_file 219 finally: 220 if tmp_file: 221 os.close(tmp_fd) 222 os.unlink(tmp_file) 223 return None 224 225 def ProcessCentralInfo(self, offset_list, central_info): 226 match = self.zipinfo_central_info_match.search(central_info) 227 if not match: 228 raise Exception("Cannot find all info from zipinfo\n" + central_info) 229 name = match.group(1) 230 start = int(match.group(2)) 231 end = start + int(match.group(3)) 232 233 offset_list.append([name, start, end]) 234 return name, start, end 235 236 def GetLibFromApk(self, apk, offset): 237 # Convert the string to hex. 238 offset = int(offset, 16) 239 240 # Check if we already have information about this offset. 241 if apk in self.apk_info: 242 apk_full_path, offset_list, tmp_files = self.apk_info[apk] 243 for file_name, start, end in offset_list: 244 if offset >= start and offset < end: 245 if file_name in tmp_files: 246 return file_name, tmp_files[file_name] 247 tmp_file = self.ExtractLibFromApk(apk_full_path, file_name) 248 if tmp_file: 249 tmp_files[file_name] = tmp_file 250 return file_name, tmp_file 251 break 252 return None, None 253 254 if not "ANDROID_PRODUCT_OUT" in os.environ: 255 print "ANDROID_PRODUCT_OUT environment variable not set." 256 return None, None 257 out_dir = os.environ["ANDROID_PRODUCT_OUT"] 258 if not os.path.exists(out_dir): 259 print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist." 260 return None, None 261 if apk.startswith("/"): 262 apk_full_path = out_dir + apk 263 else: 264 apk_full_path = os.path.join(out_dir, apk) 265 if not os.path.exists(apk_full_path): 266 print "Cannot find apk " + apk; 267 return None, None 268 269 cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE) 270 # Find the first central info marker. 271 for line in cmd.stdout: 272 if self.zipinfo_central_directory_line.search(line): 273 break 274 275 central_info = "" 276 file_name = None 277 offset_list = [] 278 for line in cmd.stdout: 279 match = self.zipinfo_central_directory_line.search(line) 280 if match: 281 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 282 if not file_name and offset >= start and offset < end: 283 file_name = cur_name 284 central_info = "" 285 else: 286 central_info += line 287 if central_info: 288 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 289 if not file_name and offset >= start and offset < end: 290 file_name = cur_name 291 292 # Save the information from the zip. 293 tmp_files = dict() 294 self.apk_info[apk] = [apk_full_path, offset_list, tmp_files] 295 if not file_name: 296 return None, None 297 tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name) 298 if tmp_shared_lib: 299 tmp_files[file_name] = tmp_shared_lib 300 return file_name, tmp_shared_lib 301 return None, None 302 303 def ProcessLine(self, line): 304 ret = False 305 process_header = self.process_info_line.search(line) 306 signal_header = self.signal_line.search(line) 307 abort_message_header = self.abort_message_line.search(line) 308 thread_header = self.thread_line.search(line) 309 register_header = self.register_line.search(line) 310 revision_header = self.revision_line.search(line) 311 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line) 312 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line) 313 unreachable_header = self.unreachable_line.search(line) 314 if process_header or signal_header or abort_message_header or thread_header or \ 315 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \ 316 revision_header or unreachable_header: 317 ret = True 318 if self.trace_lines or self.value_lines: 319 self.PrintOutput(self.trace_lines, self.value_lines) 320 self.PrintDivider() 321 self.trace_lines = [] 322 self.value_lines = [] 323 self.last_frame = -1 324 if process_header: 325 print process_header.group(1) 326 if signal_header: 327 print signal_header.group(1) 328 if abort_message_header: 329 print abort_message_header.group(1) 330 if register_header: 331 print register_header.group(1) 332 if thread_header: 333 print thread_header.group(1) 334 if dalvik_jni_thread_header: 335 print dalvik_jni_thread_header.group(1) 336 if dalvik_native_thread_header: 337 print dalvik_native_thread_header.group(1) 338 if revision_header: 339 print revision_header.group(1) 340 if unreachable_header: 341 print unreachable_header.group(1) 342 return True 343 trace_line_dict = self.MatchTraceLine(line) 344 if trace_line_dict is not None: 345 ret = True 346 frame = int(trace_line_dict["frame"]) 347 code_addr = trace_line_dict["offset"] 348 area = trace_line_dict["dso"] 349 so_offset = trace_line_dict["so_offset"] 350 symbol_present = trace_line_dict["symbol_present"] 351 symbol_name = trace_line_dict["symbol_name"] 352 353 if frame <= self.last_frame and (self.trace_lines or self.value_lines): 354 self.PrintOutput(self.trace_lines, self.value_lines) 355 self.PrintDivider() 356 self.trace_lines = [] 357 self.value_lines = [] 358 self.last_frame = frame 359 360 if area == "<unknown>" or area == "[heap]" or area == "[stack]": 361 self.trace_lines.append((code_addr, "", area)) 362 else: 363 # If this is an apk, it usually means that there is actually 364 # a shared so that was loaded directly out of it. In that case, 365 # extract the shared library and the name of the shared library. 366 lib = None 367 if area.endswith(".apk") and so_offset: 368 lib_name, lib = self.GetLibFromApk(area, so_offset) 369 if not lib: 370 lib = area 371 lib_name = None 372 373 # If a calls b which further calls c and c is inlined to b, we want to 374 # display "a -> b -> c" in the stack trace instead of just "a -> c" 375 info = symbol.SymbolInformation(lib, code_addr) 376 nest_count = len(info) - 1 377 for (source_symbol, source_location, object_symbol_with_offset) in info: 378 if not source_symbol: 379 if symbol_present: 380 source_symbol = symbol.CallCppFilt(symbol_name) 381 else: 382 source_symbol = "<unknown>" 383 if not source_location: 384 source_location = area 385 if lib_name: 386 source_location += "(" + lib_name + ")" 387 if nest_count > 0: 388 nest_count = nest_count - 1 389 arrow = "v------>" 390 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 391 arrow = "v-------------->" 392 self.trace_lines.append((arrow, source_symbol, source_location)) 393 else: 394 if not object_symbol_with_offset: 395 object_symbol_with_offset = source_symbol 396 self.trace_lines.append((code_addr, 397 object_symbol_with_offset, 398 source_location)) 399 if self.code_line.match(line): 400 # Code lines should be ignored. If this were exluded the 'code around' 401 # sections would trigger value_line matches. 402 return ret 403 if self.value_line.match(line): 404 ret = True 405 match = self.value_line.match(line) 406 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups() 407 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area: 408 self.value_lines.append((addr, value, "", area)) 409 else: 410 info = symbol.SymbolInformation(area, value) 411 (source_symbol, source_location, object_symbol_with_offset) = info.pop() 412 # If there is no information, skip this. 413 if source_symbol or source_location or object_symbol_with_offset: 414 if not source_symbol: 415 if symbol_present: 416 source_symbol = symbol.CallCppFilt(symbol_name) 417 else: 418 source_symbol = "<unknown>" 419 if not source_location: 420 source_location = area 421 if not object_symbol_with_offset: 422 object_symbol_with_offset = source_symbol 423 self.value_lines.append((addr, 424 value, 425 object_symbol_with_offset, 426 source_location)) 427 428 return ret 429 430 431class RegisterPatternTests(unittest.TestCase): 432 def assert_register_matches(self, abi, example_crash, stupid_pattern): 433 tc = TraceConverter() 434 lines = example_crash.split('\n') 435 symbol.SetAbi(lines) 436 tc.UpdateAbiRegexes() 437 for line in lines: 438 tc.ProcessLine(line) 439 is_register = (re.search(stupid_pattern, line) is not None) 440 matched = (tc.register_line.search(line) is not None) 441 self.assertEquals(matched, is_register, line) 442 tc.PrintOutput(tc.trace_lines, tc.value_lines) 443 444 def test_arm_registers(self): 445 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') 446 447 def test_arm64_registers(self): 448 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') 449 450 def test_mips_registers(self): 451 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b') 452 453 def test_mips64_registers(self): 454 self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b') 455 456 def test_x86_registers(self): 457 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') 458 459 def test_x86_64_registers(self): 460 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b') 461 462class LibmemunreachablePatternTests(unittest.TestCase): 463 def test_libmemunreachable(self): 464 tc = TraceConverter() 465 lines = example_crashes.libmemunreachable.split('\n') 466 467 symbol.SetAbi(lines) 468 self.assertEquals(symbol.ARCH, "arm") 469 470 tc.UpdateAbiRegexes() 471 header_lines = 0 472 trace_lines = 0 473 for line in lines: 474 tc.ProcessLine(line) 475 if re.search(tc.unreachable_line, line) is not None: 476 header_lines += 1 477 if tc.MatchTraceLine(line) is not None: 478 trace_lines += 1 479 self.assertEquals(header_lines, 3) 480 self.assertEquals(trace_lines, 2) 481 tc.PrintOutput(tc.trace_lines, tc.value_lines) 482 483class LongASANStackTests(unittest.TestCase): 484 # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two 485 # when the frame number becomes two digits. This happened before as the frame number was 486 # handled as a string and not converted to an integral. 487 def test_long_asan_crash(self): 488 tc = TraceConverter() 489 lines = example_crashes.long_asan_crash.splitlines() 490 symbol.SetAbi(lines) 491 tc.UpdateAbiRegexes() 492 # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace 493 # is split, a separator is printed and trace_lines is flushed. 494 trace_line_count = 0 495 for line in lines: 496 tc.ProcessLine(line) 497 self.assertLessEqual(trace_line_count, len(tc.trace_lines)) 498 trace_line_count = len(tc.trace_lines) 499 # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored) 500 # more than ten frames. 501 self.assertGreater(trace_line_count, 10) 502 tc.PrintOutput(tc.trace_lines, tc.value_lines) 503 504class ValueLinesTest(unittest.TestCase): 505 def test_value_line_skipped(self): 506 tc = TraceConverter() 507 symbol.SetAbi(["ABI: 'arm'"]) 508 tc.UpdateAbiRegexes() 509 tc.ProcessLine(" 12345678 00001000 .") 510 self.assertEqual([], tc.value_lines) 511 512if __name__ == '__main__': 513 unittest.main() 514