1#!/usr/bin/env python
2# Merge or print the coverage data collected by asan's coverage.
3# Input files are sequences of 4-byte integers.
4# We need to merge these integers into a set and then
5# either print them (as hex) or dump them into another file.
6import array
7import bisect
8import glob
9import os.path
10import struct
11import subprocess
12import sys
13
14prog_name = ""
15
16def Usage():
17  print >> sys.stderr, "Usage: \n" + \
18      " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
19      " " + prog_name + " print FILE [FILE...]\n" \
20      " " + prog_name + " unpack FILE [FILE...]\n" \
21      " " + prog_name + " rawunpack FILE [FILE ...]\n" \
22      " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
23  exit(1)
24
25def CheckBits(bits):
26  if bits != 32 and bits != 64:
27    raise Exception("Wrong bitness: %d" % bits)
28
29def TypeCodeForBits(bits):
30  CheckBits(bits)
31  return 'L' if bits == 64 else 'I'
32
33def TypeCodeForStruct(bits):
34  CheckBits(bits)
35  return 'Q' if bits == 64 else 'I'
36
37kMagic32SecondHalf = 0xFFFFFF32;
38kMagic64SecondHalf = 0xFFFFFF64;
39kMagicFirstHalf    = 0xC0BFFFFF;
40
41def MagicForBits(bits):
42  CheckBits(bits)
43  if sys.byteorder == 'little':
44    return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf]
45  else:
46    return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf]
47
48def ReadMagicAndReturnBitness(f, path):
49  magic_bytes = f.read(8)
50  magic_words = struct.unpack('II', magic_bytes);
51  bits = 0
52  idx = 1 if sys.byteorder == 'little' else 0
53  if magic_words[idx] == kMagicFirstHalf:
54    if magic_words[1-idx] == kMagic64SecondHalf:
55      bits = 64
56    elif magic_words[1-idx] == kMagic32SecondHalf:
57      bits = 32
58  if bits == 0:
59    raise Exception('Bad magic word in %s' % path)
60  return bits
61
62def ReadOneFile(path):
63  with open(path, mode="rb") as f:
64    f.seek(0, 2)
65    size = f.tell()
66    f.seek(0, 0)
67    if size < 8:
68      raise Exception('File %s is short (< 8 bytes)' % path)
69    bits = ReadMagicAndReturnBitness(f, path)
70    size -= 8
71    s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
72  print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path)
73  return s
74
75def Merge(files):
76  s = set()
77  for f in files:
78    s = s.union(set(ReadOneFile(f)))
79  print >> sys.stderr, "%s: %d files merged; %d PCs total" % \
80    (prog_name, len(files), len(s))
81  return sorted(s)
82
83def PrintFiles(files):
84  if len(files) > 1:
85    s = Merge(files)
86  else:  # If there is just on file, print the PCs in order.
87    s = ReadOneFile(files[0])
88    print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \
89      (prog_name, len(s))
90  for i in s:
91    print "0x%x" % i
92
93def MergeAndPrint(files):
94  if sys.stdout.isatty():
95    Usage()
96  s = Merge(files)
97  bits = 32
98  if max(s) > 0xFFFFFFFF:
99    bits = 64
100  array.array('I', MagicForBits(bits)).tofile(sys.stdout)
101  a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
102  sys.stdout.write(a)
103
104
105def UnpackOneFile(path):
106  with open(path, mode="rb") as f:
107    print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
108    while True:
109      header = f.read(12)
110      if not header: return
111      if len(header) < 12:
112        break
113      pid, module_length, blob_size = struct.unpack('iII', header)
114      module = f.read(module_length)
115      blob = f.read(blob_size)
116      assert(len(module) == module_length)
117      assert(len(blob) == blob_size)
118      extracted_file = "%s.%d.sancov" % (module, pid)
119      print >> sys.stderr, "%s: extracting %s" % \
120        (prog_name, extracted_file)
121      # The packed file may contain multiple blobs for the same pid/module
122      # pair. Append to the end of the file instead of overwriting.
123      with open(extracted_file, 'ab') as f2:
124        f2.write(blob)
125    # fail
126    raise Exception('Error reading file %s' % path)
127
128
129def Unpack(files):
130  for f in files:
131    UnpackOneFile(f)
132
133def UnpackOneRawFile(path, map_path):
134  mem_map = []
135  with open(map_path, mode="rt") as f_map:
136    print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path)
137    bits = int(f_map.readline())
138    if bits != 32 and bits != 64:
139      raise Exception('Wrong bits size in the map')
140    for line in f_map:
141      parts = line.rstrip().split()
142      mem_map.append((int(parts[0], 16),
143                  int(parts[1], 16),
144                  int(parts[2], 16),
145                  ' '.join(parts[3:])))
146  mem_map.sort(key=lambda m : m[0])
147  mem_map_keys = [m[0] for m in mem_map]
148
149  with open(path, mode="rb") as f:
150    print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
151
152    f.seek(0, 2)
153    size = f.tell()
154    f.seek(0, 0)
155    pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
156    mem_map_pcs = [[] for i in range(0, len(mem_map))]
157
158    for pc in pcs:
159      if pc == 0: continue
160      map_idx = bisect.bisect(mem_map_keys, pc) - 1
161      (start, end, base, module_path) = mem_map[map_idx]
162      assert pc >= start
163      if pc >= end:
164        print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc)
165        continue
166      mem_map_pcs[map_idx].append(pc - base)
167
168    for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
169      if len(pc_list) == 0: continue
170      assert path.endswith('.sancov.raw')
171      dst_path = module_path + '.' + os.path.basename(path)[:-4]
172      print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path)
173      sorted_pc_list = sorted(pc_list)
174      pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list)
175      with open(dst_path, 'ab+') as f2:
176        array.array('I', MagicForBits(bits)).tofile(f2)
177        f2.seek(0, 2)
178        f2.write(pc_buffer)
179
180def RawUnpack(files):
181  for f in files:
182    if not f.endswith('.sancov.raw'):
183      raise Exception('Unexpected raw file name %s' % f)
184    f_map = f[:-3] + 'map'
185    UnpackOneRawFile(f, f_map)
186
187def GetInstrumentedPCs(binary):
188  # This looks scary, but all it does is extract all offsets where we call:
189  # - __sanitizer_cov() or __sanitizer_cov_with_check(),
190  # - with call or callq,
191  # - directly or via PLT.
192  cmd = "objdump -d %s | " \
193        "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \
194        "grep '^\s\+[0-9a-f]\+' -o" % binary
195  proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
196                          shell=True)
197  proc.stdin.close()
198  # The PCs we get from objdump are off by 4 bytes, as they point to the
199  # beginning of the callq instruction. Empirically this is true on x86 and
200  # x86_64.
201  return set(int(line.strip(), 16) + 4 for line in proc.stdout)
202
203def PrintMissing(binary):
204  if not os.path.isfile(binary):
205    raise Exception('File not found: %s' % binary)
206  instrumented = GetInstrumentedPCs(binary)
207  print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name,
208                                                                len(instrumented),
209                                                                binary)
210  covered = set(int(line, 16) for line in sys.stdin)
211  print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered))
212  missing = instrumented - covered
213  print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing))
214  if (len(missing) > len(instrumented) - len(covered)):
215    print >> sys.stderr, \
216        "%s: WARNING: stdin contains PCs not found in binary" % prog_name
217  for pc in sorted(missing):
218    print "0x%x" % pc
219
220if __name__ == '__main__':
221  prog_name = sys.argv[0]
222  if len(sys.argv) <= 2:
223    Usage();
224
225  if sys.argv[1] == "missing":
226    if len(sys.argv) != 3:
227      Usage()
228    PrintMissing(sys.argv[2])
229    exit(0)
230
231  file_list = []
232  for f in sys.argv[2:]:
233    file_list += glob.glob(f)
234  if not file_list:
235    Usage()
236
237  if sys.argv[1] == "print":
238    PrintFiles(file_list)
239  elif sys.argv[1] == "merge":
240    MergeAndPrint(file_list)
241  elif sys.argv[1] == "unpack":
242    Unpack(file_list)
243  elif sys.argv[1] == "rawunpack":
244    RawUnpack(file_list)
245  else:
246    Usage()
247