1#!/usr/bin/python
2
3"""Disassemble the code stored in a tombstone.
4
5The classes in this module use an interface, ProcessLine, so that they can be
6chained together to do arbitrary procerssing. The current classes support
7disassembling the bytes embedded in tombstones and printing output to stdout.
8"""
9
10
11import re
12import subprocess
13import sys
14import tempfile
15import architecture
16
17
18STANDARD_PROLOGUE = """
19       .type   _start, %function
20       .globl  _start
21_start:
22"""
23
24
25THUMB_PROLOGUE = STANDARD_PROLOGUE + """
26       .code   16
27       .thumb_func
28       .type   thumb_start, %function
29thumb_start:
30"""
31
32
33def Disassemble(line_generator):
34  abi_line = re.compile("(ABI: \'(.*)\')")
35  abi = None
36  tools = None
37  # Process global headers
38  for line in line_generator:
39    yield line
40    abi_header = abi_line.search(line)
41    if abi_header:
42      abi = abi_header.group(2)
43      # Look up the tools here so we don't do a lookup for each code block.
44      tools = architecture.Architecture(abi)
45      break
46  # The rest of the file consists of:
47  #   o Lines that should pass through unchanged
48  #   o Blocks of register values, which follow a 'pid: ...' line and end with
49  #     'backtrace:' line
50  #   o Blocks of code represented as words, which start with 'code around ...'
51  #     and end with a line that doesn't look like a list of words.
52  #
53  # The only constraint on the ordering of these blocks is that the register
54  # values must come before the first code block.
55  #
56  # It's easiest to nest register processing in the codeblock search loop.
57  register_list_re = re.compile('^pid: ')
58  codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)')
59  register_text = {}
60  for line in line_generator:
61    yield line
62    if register_list_re.search(line):
63      register_text = {}
64      for output in ProcessRegisterList(line_generator, register_text):
65        yield output
66    code_match = codeblock_re.search(line)
67    if code_match:
68      code_reg = ''.join(code_match.groups(''))
69      for output in ProcessCodeBlock(
70          abi, tools, code_reg, register_text, line_generator):
71        yield output
72
73
74def ProcessRegisterList(line_generator, rval):
75  for line in line_generator:
76    yield line
77    if line.startswith('backtrace:'):
78      return
79    # The register list is indented and consists of alternating name, value
80    # pairs.
81    if line.startswith(' '):
82      words = line.split()
83      assert len(words) % 2 == 0
84      for index in range(0, len(words), 2):
85        rval[words[index]] = words[index + 1]
86
87
88def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator):
89  program_counter = register_text[register_name]
90  program_counter_val = int(program_counter, 16)
91  scratch_file = tempfile.NamedTemporaryFile(suffix='.s')
92  # ARM code comes in two flavors: arm and thumb. Figure out the one
93  # to use by peeking in the cpsr.
94  if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20:
95    scratch_file.write(THUMB_PROLOGUE)
96  else:
97    scratch_file.write(STANDARD_PROLOGUE)
98  # Retains the hexadecimal text for the start of the block
99  start_address = None
100  # Maintains a numeric counter for the address of the current byte
101  current_address = None
102  # Handle the 3 differnt file formats that we've observerd.
103  if len(program_counter) == 8:
104    block_line_len = [67]
105    block_num_words = 4
106  else:
107    assert len(program_counter) == 16
108    block_line_len = [57, 73]
109    block_num_words = 2
110  # Now generate assembly from the bytes in the code block.
111  for line in line_generator:
112    words = line.split()
113    # Be conservative and stop interpreting if the line length is wrong
114    # We can't count words because spaces can appear in the text representation
115    # of the memory.
116    if len(line) not in block_line_len:
117      break
118    # Double check the address at the start of each line
119    if current_address is None:
120      start_address = words[0]
121      current_address = int(start_address, 16)
122    else:
123      assert current_address == int(words[0], 16)
124    for word in words[1:block_num_words+1]:
125      # Handle byte swapping
126      for byte in tools.WordToBytes(word):
127        # Emit a label at the desired program counter.
128        # This will cause the disassembler to resynchronize at this point,
129        # allowing us to position the arrow and also ensuring that we decode
130        # the instruction properly.
131        if current_address == program_counter_val:
132          scratch_file.write('program_counter_was_here:\n')
133        scratch_file.write('  .byte 0x%s\n' % byte)
134        current_address += 1
135  scratch_file.flush()
136  # Assemble the scratch file and relocate it to the block address with the
137  # linker.
138  object_file = tempfile.NamedTemporaryFile(suffix='.o')
139  subprocess.check_call(tools.Assemble([
140      '-o', object_file.name, scratch_file.name]))
141  scratch_file.close()
142
143  # Work around ARM data tagging: rename $d to $t.
144  if abi.startswith('arm'):
145    subprocess.check_call(
146        ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name])
147
148  linked_file = tempfile.NamedTemporaryFile(suffix='.o')
149  cmd = tools.Link([
150      '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name])
151  subprocess.check_call(cmd)
152  object_file.close()
153  disassembler = subprocess.Popen(tools.Disassemble([
154      '-S', linked_file.name]), stdout=subprocess.PIPE)
155  # Skip some of the annoying assembler headers.
156  emit = False
157  start_pattern = start_address + ' '
158  # objdump padding varies between 32 bit and 64 bit architectures
159  arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val)
160  for line in disassembler.stdout:
161    emit = emit or line.startswith(start_pattern)
162    if emit and len(line) > 1 and line.find('program_counter_was_here') == -1:
163      if arrow_pattern.search(line):
164        yield '--->' + line
165      else:
166        yield '    ' + line
167  linked_file.close()
168  yield '\n'
169
170
171def main(argv):
172  for fn in argv[1:]:
173    for line in Disassemble(open(fn, 'r')):
174      print line,
175
176
177if __name__ == '__main__':
178  main(sys.argv)
179