1#!/usr/bin/env python
2# Copyright 2016 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Script for merging sancov files in parallel.
7
8When merging test runner output, the sancov files are expected
9to be located in one directory with the file-name pattern:
10<executable name>.test.<id>.<attempt>.sancov
11
12For each executable, this script writes a new file:
13<executable name>.result.sancov
14
15When --swarming-output-dir is specified, this script will merge the result
16files found there into the coverage folder.
17
18The sancov tool is expected to be in the llvm compiler-rt third-party
19directory. It's not checked out by default and must be added as a custom deps:
20'v8/third_party/llvm/projects/compiler-rt':
21    'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
22"""
23
24import argparse
25import logging
26import math
27import os
28import re
29import subprocess
30import sys
31
32from multiprocessing import Pool, cpu_count
33
34
35logging.basicConfig(level=logging.INFO)
36
37# V8 checkout directory.
38BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
39    os.path.abspath(__file__))))
40
41# The sancov tool location.
42SANCOV_TOOL = os.path.join(
43    BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
44    'lib', 'sanitizer_common', 'scripts', 'sancov.py')
45
46# Number of cpus.
47CPUS = cpu_count()
48
49# Regexp to find sancov file as output by the v8 test runner. Also grabs the
50# executable name in group 1.
51SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.\d+\.sancov$')
52
53# Regexp to find sancov result files as returned from swarming.
54SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$')
55
56
57def merge(args):
58  """Merge several sancov files into one.
59
60  Called trough multiprocessing pool. The args are expected to unpack to:
61    keep: Option if source and intermediate sancov files should be kept.
62    coverage_dir: Folder where to find the sancov files.
63    executable: Name of the executable whose sancov files should be merged.
64    index: A number to be put into the intermediate result file name.
65           If None, this is a final result.
66    bucket: The list of sancov files to be merged.
67  Returns: A tuple with the executable name and the result file name.
68  """
69  keep, coverage_dir, executable, index, bucket = args
70  process = subprocess.Popen(
71      [SANCOV_TOOL, 'merge'] + bucket,
72      stdout=subprocess.PIPE,
73      stderr=subprocess.PIPE,
74      cwd=coverage_dir,
75  )
76  output, _ = process.communicate()
77  assert process.returncode == 0
78  if index is not None:
79    # This is an intermediate result, add the bucket index to the file name.
80    result_file_name = '%s.result.%d.sancov' % (executable, index)
81  else:
82    # This is the final result without bucket index.
83    result_file_name = '%s.result.sancov' % executable
84  with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
85    f.write(output)
86  if not keep:
87    for f in bucket:
88      os.remove(os.path.join(coverage_dir, f))
89  return executable, result_file_name
90
91
92def generate_inputs(keep, coverage_dir, file_map, cpus):
93  """Generate inputs for multiprocessed merging.
94
95  Splits the sancov files into several buckets, so that each bucket can be
96  merged in a separate process. We have only few executables in total with
97  mostly lots of associated files. In the general case, with many executables
98  we might need to avoid splitting buckets of executables with few files.
99
100  Returns: List of args as expected by merge above.
101  """
102  inputs = []
103  for executable, files in file_map.iteritems():
104    # What's the bucket size for distributing files for merging? E.g. with
105    # 2 cpus and 9 files we want bucket size 5.
106    n = max(2, int(math.ceil(len(files) / float(cpus))))
107
108    # Chop files into buckets.
109    buckets = [files[i:i+n] for i in xrange(0, len(files), n)]
110
111    # Inputs for multiprocessing. List of tuples containing:
112    # Keep-files option, base path, executable name, index of bucket,
113    # list of files.
114    inputs.extend([(keep, coverage_dir, executable, i, b)
115                   for i, b in enumerate(buckets)])
116  return inputs
117
118
119def merge_parallel(inputs, merge_fun=merge):
120  """Process several merge jobs in parallel."""
121  pool = Pool(CPUS)
122  try:
123    return pool.map(merge_fun, inputs)
124  finally:
125    pool.close()
126
127
128def merge_test_runner_output(options):
129  # Map executable names to their respective sancov files.
130  file_map = {}
131  for f in os.listdir(options.coverage_dir):
132    match = SANCOV_FILE_RE.match(f)
133    if match:
134      file_map.setdefault(match.group(1), []).append(f)
135
136  inputs = generate_inputs(
137      options.keep, options.coverage_dir, file_map, CPUS)
138
139  logging.info('Executing %d merge jobs in parallel for %d executables.' %
140               (len(inputs), len(file_map)))
141
142  results = merge_parallel(inputs)
143
144  # Map executable names to intermediate bucket result files.
145  file_map = {}
146  for executable, f in results:
147    file_map.setdefault(executable, []).append(f)
148
149  # Merge the bucket results for each executable.
150  # The final result has index None, so no index will appear in the
151  # file name.
152  inputs = [(options.keep, options.coverage_dir, executable, None, files)
153             for executable, files in file_map.iteritems()]
154
155  logging.info('Merging %d intermediate results.' % len(inputs))
156
157  merge_parallel(inputs)
158
159
160def merge_two(args):
161  """Merge two sancov files.
162
163  Called trough multiprocessing pool. The args are expected to unpack to:
164    swarming_output_dir: Folder where to find the new file.
165    coverage_dir: Folder where to find the existing file.
166    f: File name of the file to be merged.
167  """
168  swarming_output_dir, coverage_dir, f = args
169  input_file = os.path.join(swarming_output_dir, f)
170  output_file = os.path.join(coverage_dir, f)
171  process = subprocess.Popen(
172      [SANCOV_TOOL, 'merge', input_file, output_file],
173      stdout=subprocess.PIPE,
174      stderr=subprocess.PIPE,
175  )
176  output, _ = process.communicate()
177  assert process.returncode == 0
178  with open(output_file, "wb") as f:
179    f.write(output)
180
181
182def merge_swarming_output(options):
183  # Iterate sancov files from swarming.
184  files = []
185  for f in os.listdir(options.swarming_output_dir):
186    match = SANCOV_RESULTS_FILE_RE.match(f)
187    if match:
188      if os.path.exists(os.path.join(options.coverage_dir, f)):
189        # If the same file already exists, we'll merge the data.
190        files.append(f)
191      else:
192        # No file yet? Just move it.
193        os.rename(os.path.join(options.swarming_output_dir, f),
194                  os.path.join(options.coverage_dir, f))
195
196  inputs = [(options.swarming_output_dir, options.coverage_dir, f)
197            for f in files]
198
199  logging.info('Executing %d merge jobs in parallel.' % len(inputs))
200  merge_parallel(inputs, merge_two)
201
202
203def main():
204  parser = argparse.ArgumentParser()
205  parser.add_argument('--coverage-dir', required=True,
206                      help='Path to the sancov output files.')
207  parser.add_argument('--keep', default=False, action='store_true',
208                      help='Keep sancov output files after merging.')
209  parser.add_argument('--swarming-output-dir',
210                      help='Folder containing a results shard from swarming.')
211  options = parser.parse_args()
212
213  # Check if folder with coverage output exists.
214  assert (os.path.exists(options.coverage_dir) and
215          os.path.isdir(options.coverage_dir))
216
217  if options.swarming_output_dir:
218    # Check if folder with swarming output exists.
219    assert (os.path.exists(options.swarming_output_dir) and
220            os.path.isdir(options.swarming_output_dir))
221    merge_swarming_output(options)
222  else:
223    merge_test_runner_output(options)
224
225  return 0
226
227
228if __name__ == '__main__':
229  sys.exit(main())
230