1#!/usr/bin/env python
2# Copyright (c) 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6
7"""Parse an LLVM coverage report to generate useable results."""
8
9
10import argparse
11import json
12import os
13import re
14import subprocess
15import sys
16
17
18def _fix_filename(filename):
19  """Return a filename which we can use to identify the file.
20
21  The file paths printed by llvm-cov take the form:
22
23      /path/to/repo/out/dir/../../src/filename.cpp
24
25  And then they're truncated to 22 characters with leading ellipses:
26
27      ...../../src/filename.cpp
28
29  This makes it really tough to determine whether the file actually belongs in
30  the Skia repo.  This function strips out the leading junk so that, if the file
31  exists in the repo, the returned string matches the end of some relative path
32  in the repo. This doesn't guarantee correctness, but it's about as close as
33  we can get.
34  """
35  return filename.split('..')[-1].lstrip('./')
36
37
38def _file_in_repo(filename, all_files):
39  """Return the name of the checked-in file matching the given filename.
40
41  Use suffix matching to determine which checked-in files the given filename
42  matches. If there are no matches or multiple matches, return None.
43  """
44  new_file = _fix_filename(filename)
45  matched = []
46  for f in all_files:
47    if f.endswith(new_file):
48      matched.append(f)
49  if len(matched) == 1:
50    return matched[0]
51  elif len(matched) > 1:
52    print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
53                          % (new_file, '\n\t'.join(matched)))
54  return None
55
56
57def _get_per_file_per_line_coverage(report):
58  """Return a dict whose keys are file names and values are coverage data.
59
60  Values are lists which take the form (lineno, coverage, code).
61  """
62  all_files = subprocess.check_output(['git', 'ls-files']).splitlines()
63  lines = report.splitlines()
64  current_file = None
65  file_lines = []
66  files = {}
67  not_checked_in = '%' # Use this as the file name for not-checked-in files.
68  for line in lines:
69    m = re.match('([a-zA-Z0-9\./_-]+):', line)
70    if m:
71      if current_file and current_file != not_checked_in:
72        files[current_file] = file_lines
73      match_filename = _file_in_repo(m.groups()[0], all_files)
74      current_file = match_filename or not_checked_in
75      file_lines = []
76    else:
77      if current_file != not_checked_in:
78        skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
79        if line and not skip:
80          cov, linenum, code = line.split('|', 2)
81          cov = cov.strip()
82          if cov:
83            cov = int(cov)
84          else:
85            cov = None # We don't care about coverage for this line.
86          linenum = int(linenum.strip())
87          assert linenum == len(file_lines) + 1
88          file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
89  return files
90
91
92
93def _testname(filename):
94  """Transform the file name into an ingestible test name."""
95  return re.sub(r'[^a-zA-Z0-9]', '_', filename)
96
97
98def _nanobench_json(results, properties, key):
99  """Return the results in JSON format like that produced by nanobench."""
100  rv = {}
101  # Copy over the properties first, then set the 'key' and 'results' keys,
102  # in order to avoid bad formatting in case the user passes in a properties
103  # dict containing those keys.
104  rv.update(properties)
105  rv['key'] = key
106  rv['results'] = {
107    _testname(f): {
108      'coverage': {
109        'percent': percent,
110        'lines_not_covered': not_covered_lines,
111        'options': {
112          'fullname': f,
113          'dir': os.path.dirname(f),
114          'source_type': 'coverage',
115        },
116      },
117    } for percent, not_covered_lines, f in results
118  }
119  return rv
120
121
122def _parse_key_value(kv_list):
123  """Return a dict whose key/value pairs are derived from the given list.
124
125  For example:
126
127      ['k1', 'v1', 'k2', 'v2']
128  becomes:
129
130      {'k1': 'v1',
131       'k2': 'v2'}
132  """
133  if len(kv_list) % 2 != 0:
134    raise Exception('Invalid key/value pairs: %s' % kv_list)
135
136  rv = {}
137  for i in xrange(len(kv_list) / 2):
138    rv[kv_list[i*2]] = kv_list[i*2+1]
139  return rv
140
141
142def _get_per_file_summaries(line_by_line):
143  """Summarize the full line-by-line coverage report by file."""
144  per_file = []
145  for filepath, lines in line_by_line.iteritems():
146    total_lines = 0
147    covered_lines = 0
148    for _, cov, _ in lines:
149      if cov is not None:
150        total_lines += 1
151        if cov > 0:
152          covered_lines += 1
153    if total_lines > 0:
154      per_file.append((float(covered_lines)/float(total_lines)*100.0,
155                       total_lines - covered_lines,
156                       filepath))
157  return per_file
158
159
160def main():
161  """Generate useful data from a coverage report."""
162  # Parse args.
163  parser = argparse.ArgumentParser()
164  parser.add_argument('--report', help='input file; an llvm coverage report.',
165                      required=True)
166  parser.add_argument('--nanobench', help='output file for nanobench data.')
167  parser.add_argument(
168      '--key', metavar='key_or_value', nargs='+',
169      help='key/value pairs identifying this bot.')
170  parser.add_argument(
171      '--properties', metavar='key_or_value', nargs='+',
172      help='key/value pairs representing properties of this build.')
173  parser.add_argument('--linebyline',
174                      help='output file for line-by-line JSON data.')
175  args = parser.parse_args()
176
177  if args.nanobench and not (args.key and args.properties):
178    raise Exception('--key and --properties are required with --nanobench')
179
180  with open(args.report) as f:
181    report = f.read()
182
183  line_by_line = _get_per_file_per_line_coverage(report)
184
185  if args.linebyline:
186    with open(args.linebyline, 'w') as f:
187      json.dump(line_by_line, f)
188
189  if args.nanobench:
190    # Parse the key and properties for use in the nanobench JSON output.
191    key = _parse_key_value(args.key)
192    properties = _parse_key_value(args.properties)
193
194    # Get per-file summaries.
195    per_file = _get_per_file_summaries(line_by_line)
196
197    # Write results.
198    format_results = _nanobench_json(per_file, properties, key)
199    with open(args.nanobench, 'w') as f:
200      json.dump(format_results, f)
201
202
203if __name__ == '__main__':
204  main()
205