1#!/usr/bin/env python
2#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https:#llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8#===------------------------------------------------------------------------===#
9#
10# HWAddressSanitizer offline symbolization script.
11#
12#===------------------------------------------------------------------------===#
13import glob
14import os
15import re
16import sys
17import string
18import subprocess
19import argparse
20
21last_access_address = None
22last_access_tag = None
23
24class Symbolizer:
25  def __init__(self, path, binary_prefixes, paths_to_cut):
26    self.__pipe = None
27    self.__path = path
28    self.__binary_prefixes = binary_prefixes
29    self.__paths_to_cut = paths_to_cut
30    self.__log = False
31    self.__warnings = set()
32
33  def enable_logging(self, enable):
34    self.__log = enable
35
36  def __open_pipe(self):
37    if not self.__pipe:
38      self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"],
39                                     stdin=subprocess.PIPE, stdout=subprocess.PIPE)
40
41  class __EOF:
42    pass
43
44  def __write(self, s):
45    print >>self.__pipe.stdin, s
46    if self.__log:
47      print >>sys.stderr, ("#>>  |%s|" % (s,))
48
49  def __read(self):
50    s = self.__pipe.stdout.readline().rstrip()
51    if self.__log:
52      print >>sys.stderr, ("# << |%s|" % (s,))
53    if s == '':
54      raise Symbolizer.__EOF
55    return s
56
57  def __process_source_path(self, file_name):
58    for path_to_cut in self.__paths_to_cut:
59      file_name = re.sub(".*" + path_to_cut, "", file_name)
60    file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
61    file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
62    file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
63    return file_name
64
65  def __process_binary_name(self, name):
66    if name.startswith('/'):
67      name = name[1:]
68    for p in self.__binary_prefixes:
69      full_path = os.path.join(p, name)
70      if os.path.exists(full_path):
71        return full_path
72    # Try stripping extra path components as the last resort.
73    for p in self.__binary_prefixes:
74      full_path = os.path.join(p, os.path.basename(name))
75      if os.path.exists(full_path):
76        return full_path
77    if name not in self.__warnings:
78      print >>sys.stderr, "Could not find symbols for", name
79      self.__warnings.add(name)
80    return None
81
82  def iter_locals(self, binary, addr):
83    self.__open_pipe()
84    p = self.__pipe
85    binary = self.__process_binary_name(binary)
86    if not binary:
87      return
88    self.__write("FRAME %s %s" % (binary, addr))
89    try:
90      while True:
91        function_name = self.__read()
92        local_name = self.__read()
93        file_line = self.__read()
94        extra = self.__read().split()
95
96        file_line = self.__process_source_path(file_line)
97        offset = None if extra[0] == '??' else int(extra[0])
98        size = None if extra[1] == '??' else int(extra[1])
99        tag_offset = None if extra[2] == '??' else int(extra[2])
100        yield (function_name, file_line, local_name, offset, size, tag_offset)
101    except Symbolizer.__EOF:
102      pass
103
104  def iter_call_stack(self, binary, addr):
105    self.__open_pipe()
106    p = self.__pipe
107    binary = self.__process_binary_name(binary)
108    if not binary:
109      return
110    self.__write("CODE %s %s" % (binary, addr))
111    try:
112      while True:
113        function_name = self.__read()
114        file_line = self.__read()
115        file_line = self.__process_source_path(file_line)
116        yield (function_name, file_line)
117    except Symbolizer.__EOF:
118      pass
119
120def symbolize_line(line, symbolizer_path):
121  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
122  match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
123  if match:
124    frameno = match.group(2)
125    binary = match.group(5)
126    addr = int(match.group(6), 16)
127
128    frames = list(symbolizer.iter_call_stack(binary, addr))
129
130    if len(frames) > 0:
131      print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'),
132                                 match.group(3).encode('utf-8'), frames[0][0], frames[0][1])
133      for i in range(1, len(frames)):
134        space1 = ' ' * match.end(1)
135        space2 = ' ' * (match.start(4) - match.end(1) - 2)
136        print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])
137    else:
138      print line.rstrip().encode('utf-8')
139  else:
140    print line.rstrip().encode('utf-8')
141
142def save_access_address(line):
143  global last_access_address, last_access_tag
144  match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
145  if match:
146    last_access_address = int(match.group(2), 16)
147  match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
148  if match:
149    last_access_tag = int(match.group(2), 16)
150
151def process_stack_history(line, symbolizer, ignore_tags=False):
152  if last_access_address is None or last_access_tag is None:
153    return
154  if re.match(r'Previously allocated frames:', line, re.UNICODE):
155    return True
156  pc_mask = (1 << 48) - 1
157  fp_mask = (1 << 20) - 1
158  # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD)
159  match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
160  if match:
161    record_addr = int(match.group(2), 16)
162    record = int(match.group(3), 16)
163    binary = match.group(4)
164    addr = int(match.group(5), 16)
165    base_tag = (record_addr >> 3) & 0xFF
166    fp = (record >> 48) << 4
167    pc = record & pc_mask
168
169    for local in symbolizer.iter_locals(binary, addr):
170      frame_offset = local[3]
171      size = local[4]
172      if frame_offset is None or size is None:
173        continue
174      obj_offset = (last_access_address - fp - frame_offset) & fp_mask
175      if obj_offset >= size:
176        continue
177      tag_offset = local[5]
178      if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
179        continue
180      print ''
181      print 'Potentially referenced stack object:'
182      print '  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])
183      print '  at %s' % (local[1],)
184    return True
185  return False
186
187parser = argparse.ArgumentParser()
188parser.add_argument('-d', action='store_true')
189parser.add_argument('-v', action='store_true')
190parser.add_argument('--ignore-tags', action='store_true')
191parser.add_argument('--symbols', action='append')
192parser.add_argument('--source', action='append')
193parser.add_argument('--symbolizer')
194parser.add_argument('args', nargs=argparse.REMAINDER)
195args = parser.parse_args()
196
197# Unstripped binaries location.
198binary_prefixes = args.symbols or []
199if not binary_prefixes:
200  if 'ANDROID_PRODUCT_OUT' in os.environ:
201    product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
202    binary_prefixes.append(product_out)
203
204for p in binary_prefixes:
205  if not os.path.isdir(p):
206    print >>sys.stderr, "Symbols path does not exist or is not a directory:", p
207    sys.exit(1)
208
209# Source location.
210paths_to_cut = args.source or []
211if not paths_to_cut:
212  paths_to_cut.append(os.getcwd() + '/')
213  if 'ANDROID_BUILD_TOP' in os.environ:
214    paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
215
216# llvm-symbolizer binary.
217# 1. --symbolizer flag
218# 2. environment variable
219# 3. unsuffixed binary in the current directory
220# 4. if inside Android platform, prebuilt binary at a known path
221# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
222#    highest available version in $PATH
223symbolizer_path = args.symbolizer
224if not symbolizer_path:
225  if 'LLVM_SYMBOLIZER_PATH' in os.environ:
226    symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
227  elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
228    symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
229
230if not symbolizer_path:
231  s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
232  if os.path.exists(s):
233    symbolizer_path = s
234
235if not symbolizer_path:
236  if 'ANDROID_BUILD_TOP' in os.environ:
237    s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
238    if os.path.exists(s):
239      symbolizer_path = s
240
241if not symbolizer_path:
242  for path in os.environ["PATH"].split(os.pathsep):
243    p = os.path.join(path, 'llvm-symbolizer')
244    if os.path.exists(p):
245      symbolizer_path = p
246      break
247
248def extract_version(s):
249  idx = s.rfind('-')
250  if idx == -1:
251    return 0
252  x = float(s[idx + 1:])
253  return x
254
255if not symbolizer_path:
256  for path in os.environ["PATH"].split(os.pathsep):
257    candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
258    if len(candidates) > 0:
259      candidates.sort(key = extract_version, reverse = True)
260      symbolizer_path = candidates[0]
261      break
262
263if not os.path.exists(symbolizer_path):
264  print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path
265  sys.exit(1)
266
267if args.v:
268  print "Looking for symbols in:"
269  for s in binary_prefixes:
270    print "  %s" % (s,)
271  print "Stripping source path prefixes:"
272  for s in paths_to_cut:
273    print "  %s" % (s,)
274  print "Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,)
275  print
276
277symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
278symbolizer.enable_logging(args.d)
279
280for line in sys.stdin:
281  line = line.decode('utf-8')
282  save_access_address(line)
283  if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
284    continue
285  symbolize_line(line, symbolizer_path)
286