1#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Wrapper script to help run clang tools across Chromium code.
6
7How to use run_tool.py:
8If you want to run a clang tool across all Chromium code:
9run_tool.py <tool> <path/to/compiledb>
10
11If you want to include all files mentioned in the compilation database
12(this will also include generated files, unlike the previous command):
13run_tool.py <tool> <path/to/compiledb> --all
14
15If you want to run the clang tool across only chrome/browser and
16content/browser:
17run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
18
19Please see docs/clang_tool_refactoring.md for more information, which documents
20the entire automated refactoring flow in Chromium.
21
22Why use run_tool.py (instead of running a clang tool directly):
23The clang tool implementation doesn't take advantage of multiple cores, and if
24it fails mysteriously in the middle, all the generated replacements will be
25lost. Additionally, if the work is simply sharded across multiple cores by
26running multiple RefactoringTools, problems arise when they attempt to rewrite a
27file at the same time.
28
29run_tool.py will
301) run multiple instances of clang tool in parallel
312) gather stdout from clang tool invocations
323) "atomically" forward #2 to stdout
33
34Output of run_tool.py can be piped into extract_edits.py and then into
35apply_edits.py. These tools will extract individual edits and apply them to the
36source files. These tools assume the clang tool emits the edits in the
37following format:
38    ...
39    ==== BEGIN EDITS ====
40    r:::<file path>:::<offset>:::<length>:::<replacement text>
41    r:::<file path>:::<offset>:::<length>:::<replacement text>
42    ...etc...
43    ==== END EDITS ====
44    ...
45
46extract_edits.py extracts only lines between BEGIN/END EDITS markers
47apply_edits.py reads edit lines from stdin and applies the edits
48"""
49
50import argparse
51from collections import namedtuple
52import functools
53import json
54import multiprocessing
55import os
56import os.path
57import re
58import subprocess
59import shlex
60import sys
61
62script_dir = os.path.dirname(os.path.realpath(__file__))
63tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib'))
64sys.path.insert(0, tool_dir)
65
66from clang import compile_db
67
68
69CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command'])
70
71def _PruneGitFiles(git_files, paths):
72  """Prunes the list of files from git to include only those that are either in
73  |paths| or start with one item in |paths|.
74
75  Args:
76    git_files: List of all repository files.
77    paths: Prefix filter for the returned paths. May contain multiple entries,
78        and the contents should be absolute paths.
79
80  Returns:
81    Pruned list of files.
82  """
83  if not git_files:
84    return []
85  git_files.sort()
86  pruned_list = []
87  git_index = 0
88  for path in sorted(paths):
89    least = git_index
90    most = len(git_files) - 1
91    while least <= most:
92      middle = (least + most ) / 2
93      if git_files[middle] == path:
94        least = middle
95        break
96      elif git_files[middle] > path:
97        most = middle - 1
98      else:
99        least = middle + 1
100    while least < len(git_files) and git_files[least].startswith(path):
101      pruned_list.append(git_files[least])
102      least += 1
103    git_index = least
104
105  return pruned_list
106
107
108def _GetFilesFromGit(paths=None):
109  """Gets the list of files in the git repository if |paths| includes prefix
110  path filters or is empty. All complete filenames in |paths| are also included
111  in the output.
112
113  Args:
114    paths: Prefix filter for the returned paths. May contain multiple entries.
115  """
116  partial_paths = []
117  files = []
118  for p in paths:
119    real_path = os.path.realpath(p)
120    if os.path.isfile(real_path):
121      files.append(real_path)
122    else:
123      partial_paths.append(real_path)
124  if partial_paths or not files:
125    args = []
126    if sys.platform == 'win32':
127      args.append('git.bat')
128    else:
129      args.append('git')
130    args.append('ls-files')
131    command = subprocess.Popen(args, stdout=subprocess.PIPE)
132    output, _ = command.communicate()
133    git_files = [os.path.realpath(p) for p in output.splitlines()]
134    if partial_paths:
135      git_files = _PruneGitFiles(git_files, partial_paths)
136    files.extend(git_files)
137  return files
138
139
140def _GetEntriesFromCompileDB(build_directory, source_filenames):
141  """ Gets the list of files and args mentioned in the compilation database.
142
143  Args:
144    build_directory: Directory that contains the compile database.
145    source_filenames: If not None, only include entries for the given list of
146      filenames.
147  """
148
149  filenames_set = None if source_filenames is None else set(source_filenames)
150  return [
151      CompDBEntry(entry['directory'], entry['file'], entry['command'])
152      for entry in compile_db.Read(build_directory)
153      if filenames_set is None or os.path.realpath(
154          os.path.join(entry['directory'], entry['file'])) in filenames_set
155  ]
156
157
158def _UpdateCompileCommandsIfNeeded(compile_commands, files_list):
159  """ Filters compile database to only include required files, and makes it
160  more clang-tool friendly on Windows.
161
162  Args:
163    compile_commands: List of the contents of compile database.
164    files_list: List of required files for processing. Can be None to specify
165      no filtering.
166  Returns:
167    List of the contents of the compile database after processing.
168  """
169  if sys.platform == 'win32' and files_list:
170    relative_paths = set([os.path.relpath(f) for f in files_list])
171    filtered_compile_commands = []
172    for entry in compile_commands:
173      file_path = os.path.relpath(
174          os.path.join(entry['directory'], entry['file']))
175      if file_path in relative_paths:
176        filtered_compile_commands.append(entry)
177  else:
178    filtered_compile_commands = compile_commands
179
180  return compile_db.ProcessCompileDatabaseIfNeeded(filtered_compile_commands)
181
182
183def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry):
184  """Executes the clang tool.
185
186  This is defined outside the class so it can be pickled for the multiprocessing
187  module.
188
189  Args:
190    toolname: Name of the clang tool to execute.
191    tool_args: Arguments to be passed to the clang tool. Can be None.
192    build_directory: Directory that contains the compile database.
193    compdb_entry: The file and args to run the clang tool over.
194
195  Returns:
196    A dictionary that must contain the key "status" and a boolean value
197    associated with it.
198
199    If status is True, then the generated output is stored with the key
200    "stdout_text" in the dictionary.
201
202    Otherwise, the filename and the output from stderr are associated with the
203    keys "filename" and "stderr_text" respectively.
204  """
205
206  args = [toolname, compdb_entry.filename]
207  if (tool_args):
208    args.extend(tool_args)
209
210  args.append('--')
211  args.extend([
212      a for a in shlex.split(compdb_entry.command,
213                             posix=(sys.platform != 'win32'))
214      # 'command' contains the full command line, including the input
215      # source file itself. We need to filter it out otherwise it's
216      # passed to the tool twice - once directly and once via
217      # the compile args.
218      if a != compdb_entry.filename
219        # /showIncludes is used by Ninja to track header file dependencies on
220        # Windows. We don't need to do this here, and it results in lots of spam
221        # and a massive log file, so we strip it.
222        and a != '/showIncludes'
223  ])
224
225  # shlex.split escapes double qoutes in non-Posix mode, so we need to strip
226  # them back.
227  if sys.platform == 'win32':
228    args = [a.replace('\\"', '"') for a in args]
229  command = subprocess.Popen(
230      args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory)
231  stdout_text, stderr_text = command.communicate()
232  stderr_text = re.sub(
233      r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n",
234      "", stderr_text, flags=re.MULTILINE)
235
236  if command.returncode != 0:
237    return {
238        'status': False,
239        'filename': compdb_entry.filename,
240        'stderr_text': stderr_text,
241    }
242  else:
243    return {
244        'status': True,
245        'filename': compdb_entry.filename,
246        'stdout_text': stdout_text,
247        'stderr_text': stderr_text,
248    }
249
250
251class _CompilerDispatcher(object):
252  """Multiprocessing controller for running clang tools in parallel."""
253
254  def __init__(self, toolname, tool_args, build_directory, compdb_entries):
255    """Initializer method.
256
257    Args:
258      toolname: Path to the tool to execute.
259      tool_args: Arguments to be passed to the tool. Can be None.
260      build_directory: Directory that contains the compile database.
261      compdb_entries: The files and args to run the tool over.
262    """
263    self.__toolname = toolname
264    self.__tool_args = tool_args
265    self.__build_directory = build_directory
266    self.__compdb_entries = compdb_entries
267    self.__success_count = 0
268    self.__failed_count = 0
269
270  @property
271  def failed_count(self):
272    return self.__failed_count
273
274  def Run(self):
275    """Does the grunt work."""
276    pool = multiprocessing.Pool()
277    result_iterator = pool.imap_unordered(
278        functools.partial(_ExecuteTool, self.__toolname, self.__tool_args,
279                          self.__build_directory),
280                          self.__compdb_entries)
281    for result in result_iterator:
282      self.__ProcessResult(result)
283    sys.stderr.write('\n')
284
285  def __ProcessResult(self, result):
286    """Handles result processing.
287
288    Args:
289      result: The result dictionary returned by _ExecuteTool.
290    """
291    if result['status']:
292      self.__success_count += 1
293      sys.stdout.write(result['stdout_text'])
294      sys.stderr.write(result['stderr_text'])
295    else:
296      self.__failed_count += 1
297      sys.stderr.write('\nFailed to process %s\n' % result['filename'])
298      sys.stderr.write(result['stderr_text'])
299      sys.stderr.write('\n')
300    done_count = self.__success_count + self.__failed_count
301    percentage = (float(done_count) / len(self.__compdb_entries)) * 100
302    # Only output progress for every 100th entry, to make log files easier to
303    # inspect.
304    if done_count % 100 == 0 or done_count == len(self.__compdb_entries):
305      sys.stderr.write(
306          'Processed %d files with %s tool (%d failures) [%.2f%%]\r' %
307          (done_count, self.__toolname, self.__failed_count, percentage))
308
309
310def main():
311  parser = argparse.ArgumentParser()
312  parser.add_argument(
313      '--options-file',
314      help='optional file to read options from')
315  args, argv = parser.parse_known_args()
316  if args.options_file:
317    argv = open(args.options_file).read().split()
318
319  parser.add_argument('--tool', required=True, help='clang tool to run')
320  parser.add_argument('--all', action='store_true')
321  parser.add_argument(
322      '--generate-compdb',
323      action='store_true',
324      help='regenerate the compile database before running the tool')
325  parser.add_argument(
326      '--shard',
327      metavar='<n>-of-<count>')
328  parser.add_argument(
329      '-p',
330      required=True,
331      help='path to the directory that contains the compile database')
332  parser.add_argument(
333      'path_filter',
334      nargs='*',
335      help='optional paths to filter what files the tool is run on')
336  parser.add_argument(
337      '--tool-arg', nargs='?', action='append',
338      help='optional arguments passed to the tool')
339  parser.add_argument(
340      '--tool-path', nargs='?',
341      help='optional path to the tool directory')
342  args = parser.parse_args(argv)
343
344  if args.tool_path:
345    tool_path = os.path.abspath(args.tool_path)
346  else:
347    tool_path = os.path.abspath(os.path.join(
348          os.path.dirname(__file__),
349          '../../../third_party/llvm-build/Release+Asserts/bin'))
350
351  if args.all:
352    # Reading source files is postponed to after possible regeneration of
353    # compile_commands.json.
354    source_filenames = None
355  else:
356    git_filenames = set(_GetFilesFromGit(args.path_filter))
357    # Filter out files that aren't C/C++/Obj-C/Obj-C++.
358    extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm'))
359    source_filenames = [f
360                        for f in git_filenames
361                        if os.path.splitext(f)[1] in extensions]
362
363  if args.generate_compdb:
364    compile_commands = compile_db.GenerateWithNinja(args.p)
365    compile_commands = _UpdateCompileCommandsIfNeeded(
366        compile_commands, source_filenames)
367    with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f:
368      f.write(json.dumps(compile_commands, indent=2))
369
370  compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames))
371
372  if args.shard:
373    total_length = len(compdb_entries)
374    match = re.match(r'(\d+)-of-(\d+)$', args.shard)
375    # Input is 1-based, but modular arithmetic is 0-based.
376    shard_number = int(match.group(1)) - 1
377    shard_count = int(match.group(2))
378    compdb_entries = [
379        f for i, f in enumerate(sorted(compdb_entries))
380        if i % shard_count == shard_number
381    ]
382    print 'Shard %d-of-%d will process %d entries out of %d' % (
383        shard_number, shard_count, len(compdb_entries), total_length)
384
385  dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool),
386                                   args.tool_arg,
387                                   args.p,
388                                   compdb_entries)
389  dispatcher.Run()
390  return -dispatcher.failed_count
391
392
393if __name__ == '__main__':
394  sys.exit(main())
395