1#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Wrapper script to help run clang tools across Chromium code.
6
7How to use this tool:
8If you want to run the tool across all Chromium code:
9run_tool.py <tool> <path/to/compiledb>
10
11If you want to include all files mentioned in the compilation database:
12run_tool.py <tool> <path/to/compiledb> --all
13
14If you only want to run the tool across just chrome/browser and content/browser:
15run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
16
17Please see https://chromium.googlesource.com/chromium/src/+/master/docs/clang_tool_refactoring.md for more
18information, which documents the entire automated refactoring flow in Chromium.
19
20Why use this tool:
21The clang tool implementation doesn't take advantage of multiple cores, and if
22it fails mysteriously in the middle, all the generated replacements will be
23lost.
24
25Unfortunately, if the work is simply sharded across multiple cores by running
26multiple RefactoringTools, problems arise when they attempt to rewrite a file at
27the same time. To work around that, clang tools that are run using this tool
28should output edits to stdout in the following format:
29
30==== BEGIN EDITS ====
31r:<file path>:<offset>:<length>:<replacement text>
32r:<file path>:<offset>:<length>:<replacement text>
33...etc...
34==== END EDITS ====
35
36Any generated edits are applied once the clang tool has finished running
37across Chromium, regardless of whether some instances failed or not.
38"""
39
40import argparse
41import collections
42import functools
43import multiprocessing
44import os
45import os.path
46import subprocess
47import sys
48
49script_dir = os.path.dirname(os.path.realpath(__file__))
50tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib'))
51sys.path.insert(0, tool_dir)
52
53from clang import compile_db
54
55Edit = collections.namedtuple('Edit',
56                              ('edit_type', 'offset', 'length', 'replacement'))
57
58
59def _GetFilesFromGit(paths=None):
60  """Gets the list of files in the git repository.
61
62  Args:
63    paths: Prefix filter for the returned paths. May contain multiple entries.
64  """
65  args = []
66  if sys.platform == 'win32':
67    args.append('git.bat')
68  else:
69    args.append('git')
70  args.append('ls-files')
71  if paths:
72    args.extend(paths)
73  command = subprocess.Popen(args, stdout=subprocess.PIPE)
74  output, _ = command.communicate()
75  return [os.path.realpath(p) for p in output.splitlines()]
76
77
78def _GetFilesFromCompileDB(build_directory):
79  """ Gets the list of files mentioned in the compilation database.
80
81  Args:
82    build_directory: Directory that contains the compile database.
83  """
84  return [os.path.join(entry['directory'], entry['file'])
85          for entry in compile_db.Read(build_directory)]
86
87
88def _ExtractEditsFromStdout(build_directory, stdout):
89  """Extracts generated list of edits from the tool's stdout.
90
91  The expected format is documented at the top of this file.
92
93  Args:
94    build_directory: Directory that contains the compile database. Used to
95      normalize the filenames.
96    stdout: The stdout from running the clang tool.
97
98  Returns:
99    A dictionary mapping filenames to the associated edits.
100  """
101  lines = stdout.splitlines()
102  start_index = lines.index('==== BEGIN EDITS ====')
103  end_index = lines.index('==== END EDITS ====')
104  edits = collections.defaultdict(list)
105  for line in lines[start_index + 1:end_index]:
106    try:
107      edit_type, path, offset, length, replacement = line.split(':::', 4)
108      replacement = replacement.replace('\0', '\n')
109      # Normalize the file path emitted by the clang tool.
110      path = os.path.realpath(os.path.join(build_directory, path))
111      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
112    except ValueError:
113      print 'Unable to parse edit: %s' % line
114  return edits
115
116
117def _ExecuteTool(toolname, build_directory, filename):
118  """Executes the tool.
119
120  This is defined outside the class so it can be pickled for the multiprocessing
121  module.
122
123  Args:
124    toolname: Path to the tool to execute.
125    build_directory: Directory that contains the compile database.
126    filename: The file to run the tool over.
127
128  Returns:
129    A dictionary that must contain the key "status" and a boolean value
130    associated with it.
131
132    If status is True, then the generated edits are stored with the key "edits"
133    in the dictionary.
134
135    Otherwise, the filename and the output from stderr are associated with the
136    keys "filename" and "stderr" respectively.
137  """
138  command = subprocess.Popen(
139      (toolname, '-p', build_directory, filename),
140      stdout=subprocess.PIPE,
141      stderr=subprocess.PIPE)
142  stdout, stderr = command.communicate()
143  if command.returncode != 0:
144    return {'status': False, 'filename': filename, 'stderr': stderr}
145  else:
146    return {'status': True,
147            'edits': _ExtractEditsFromStdout(build_directory, stdout)}
148
149
150class _CompilerDispatcher(object):
151  """Multiprocessing controller for running clang tools in parallel."""
152
153  def __init__(self, toolname, build_directory, filenames):
154    """Initializer method.
155
156    Args:
157      toolname: Path to the tool to execute.
158      build_directory: Directory that contains the compile database.
159      filenames: The files to run the tool over.
160    """
161    self.__toolname = toolname
162    self.__build_directory = build_directory
163    self.__filenames = filenames
164    self.__success_count = 0
165    self.__failed_count = 0
166    self.__edit_count = 0
167    self.__edits = collections.defaultdict(list)
168
169  @property
170  def edits(self):
171    return self.__edits
172
173  @property
174  def failed_count(self):
175    return self.__failed_count
176
177  def Run(self):
178    """Does the grunt work."""
179    pool = multiprocessing.Pool()
180    result_iterator = pool.imap_unordered(
181        functools.partial(_ExecuteTool, self.__toolname,
182                          self.__build_directory), self.__filenames)
183    for result in result_iterator:
184      self.__ProcessResult(result)
185    sys.stdout.write('\n')
186    sys.stdout.flush()
187
188  def __ProcessResult(self, result):
189    """Handles result processing.
190
191    Args:
192      result: The result dictionary returned by _ExecuteTool.
193    """
194    if result['status']:
195      self.__success_count += 1
196      for k, v in result['edits'].iteritems():
197        self.__edits[k].extend(v)
198        self.__edit_count += len(v)
199    else:
200      self.__failed_count += 1
201      sys.stdout.write('\nFailed to process %s\n' % result['filename'])
202      sys.stdout.write(result['stderr'])
203      sys.stdout.write('\n')
204    percentage = (float(self.__success_count + self.__failed_count) /
205                  len(self.__filenames)) * 100
206    sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' %
207                     (self.__success_count, self.__failed_count,
208                      self.__edit_count, percentage))
209    sys.stdout.flush()
210
211
212def _ApplyEdits(edits):
213  """Apply the generated edits.
214
215  Args:
216    edits: A dict mapping filenames to Edit instances that apply to that file.
217  """
218  edit_count = 0
219  for k, v in edits.iteritems():
220    # Sort the edits and iterate through them in reverse order. Sorting allows
221    # duplicate edits to be quickly skipped, while reversing means that
222    # subsequent edits don't need to have their offsets updated with each edit
223    # applied.
224    v.sort()
225    last_edit = None
226    with open(k, 'rb+') as f:
227      contents = bytearray(f.read())
228      for edit in reversed(v):
229        if edit == last_edit:
230          continue
231        last_edit = edit
232        contents[edit.offset:edit.offset + edit.length] = edit.replacement
233        if not edit.replacement:
234          _ExtendDeletionIfElementIsInList(contents, edit.offset)
235        edit_count += 1
236      f.seek(0)
237      f.truncate()
238      f.write(contents)
239  print 'Applied %d edits to %d files' % (edit_count, len(edits))
240
241
242_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
243
244
245def _ExtendDeletionIfElementIsInList(contents, offset):
246  """Extends the range of a deletion if the deleted element was part of a list.
247
248  This rewriter helper makes it easy for refactoring tools to remove elements
249  from a list. Even if a matcher callback knows that it is removing an element
250  from a list, it may not have enough information to accurately remove the list
251  element; for example, another matcher callback may end up removing an adjacent
252  list element, or all the list elements may end up being removed.
253
254  With this helper, refactoring tools can simply remove the list element and not
255  worry about having to include the comma in the replacement.
256
257  Args:
258    contents: A bytearray with the deletion already applied.
259    offset: The offset in the bytearray where the deleted range used to be.
260  """
261  char_before = char_after = None
262  left_trim_count = 0
263  for byte in reversed(contents[:offset]):
264    left_trim_count += 1
265    if byte in _WHITESPACE_BYTES:
266      continue
267    if byte in (ord(','), ord(':'), ord('('), ord('{')):
268      char_before = chr(byte)
269    break
270
271  right_trim_count = 0
272  for byte in contents[offset:]:
273    right_trim_count += 1
274    if byte in _WHITESPACE_BYTES:
275      continue
276    if byte == ord(','):
277      char_after = chr(byte)
278    break
279
280  if char_before:
281    if char_after:
282      del contents[offset:offset + right_trim_count]
283    elif char_before in (',', ':'):
284      del contents[offset - left_trim_count:offset]
285
286
287def main():
288  parser = argparse.ArgumentParser()
289  parser.add_argument('tool', help='clang tool to run')
290  parser.add_argument('--all', action='store_true')
291  parser.add_argument(
292      '--generate-compdb',
293      action='store_true',
294      help='regenerate the compile database before running the tool')
295  parser.add_argument(
296      'compile_database',
297      help='path to the directory that contains the compile database')
298  parser.add_argument(
299      'path_filter',
300      nargs='*',
301      help='optional paths to filter what files the tool is run on')
302  args = parser.parse_args()
303
304  os.environ['PATH'] = '%s%s%s' % (
305      os.path.abspath(os.path.join(
306          os.path.dirname(__file__),
307          '../../../third_party/llvm-build/Release+Asserts/bin')),
308      os.pathsep,
309      os.environ['PATH'])
310
311  if args.generate_compdb:
312    compile_db.GenerateWithNinja(args.compile_database)
313
314  if args.all:
315    filenames = set(_GetFilesFromCompileDB(args.compile_database))
316    source_filenames = filenames
317  else:
318    filenames = set(_GetFilesFromGit(args.path_filter))
319    # Filter out files that aren't C/C++/Obj-C/Obj-C++.
320    extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm'))
321    source_filenames = [f
322                        for f in filenames
323                        if os.path.splitext(f)[1] in extensions]
324  dispatcher = _CompilerDispatcher(args.tool, args.compile_database,
325                                   source_filenames)
326  dispatcher.Run()
327  # Filter out edits to files that aren't in the git repository, since it's not
328  # useful to modify files that aren't under source control--typically, these
329  # are generated files or files in a git submodule that's not part of Chromium.
330  _ApplyEdits({k: v
331               for k, v in dispatcher.edits.iteritems()
332               if os.path.realpath(k) in filenames})
333  return -dispatcher.failed_count
334
335
336if __name__ == '__main__':
337  sys.exit(main())
338