1#!/usr/bin/env python 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Wrapper script to help run clang tools across Chromium code. 6 7How to use run_tool.py: 8If you want to run a clang tool across all Chromium code: 9run_tool.py <tool> <path/to/compiledb> 10 11If you want to include all files mentioned in the compilation database 12(this will also include generated files, unlike the previous command): 13run_tool.py <tool> <path/to/compiledb> --all 14 15If you want to run the clang tool across only chrome/browser and 16content/browser: 17run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser 18 19Please see docs/clang_tool_refactoring.md for more information, which documents 20the entire automated refactoring flow in Chromium. 21 22Why use run_tool.py (instead of running a clang tool directly): 23The clang tool implementation doesn't take advantage of multiple cores, and if 24it fails mysteriously in the middle, all the generated replacements will be 25lost. Additionally, if the work is simply sharded across multiple cores by 26running multiple RefactoringTools, problems arise when they attempt to rewrite a 27file at the same time. 28 29run_tool.py will 301) run multiple instances of clang tool in parallel 312) gather stdout from clang tool invocations 323) "atomically" forward #2 to stdout 33 34Output of run_tool.py can be piped into extract_edits.py and then into 35apply_edits.py. These tools will extract individual edits and apply them to the 36source files. These tools assume the clang tool emits the edits in the 37following format: 38 ... 39 ==== BEGIN EDITS ==== 40 r:::<file path>:::<offset>:::<length>:::<replacement text> 41 r:::<file path>:::<offset>:::<length>:::<replacement text> 42 ...etc... 43 ==== END EDITS ==== 44 ... 45 46extract_edits.py extracts only lines between BEGIN/END EDITS markers 47apply_edits.py reads edit lines from stdin and applies the edits 48""" 49 50import argparse 51from collections import namedtuple 52import functools 53import json 54import multiprocessing 55import os 56import os.path 57import re 58import subprocess 59import shlex 60import sys 61 62script_dir = os.path.dirname(os.path.realpath(__file__)) 63tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) 64sys.path.insert(0, tool_dir) 65 66from clang import compile_db 67 68 69CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command']) 70 71def _PruneGitFiles(git_files, paths): 72 """Prunes the list of files from git to include only those that are either in 73 |paths| or start with one item in |paths|. 74 75 Args: 76 git_files: List of all repository files. 77 paths: Prefix filter for the returned paths. May contain multiple entries, 78 and the contents should be absolute paths. 79 80 Returns: 81 Pruned list of files. 82 """ 83 if not git_files: 84 return [] 85 git_files.sort() 86 pruned_list = [] 87 git_index = 0 88 for path in sorted(paths): 89 least = git_index 90 most = len(git_files) - 1 91 while least <= most: 92 middle = (least + most ) / 2 93 if git_files[middle] == path: 94 least = middle 95 break 96 elif git_files[middle] > path: 97 most = middle - 1 98 else: 99 least = middle + 1 100 while least < len(git_files) and git_files[least].startswith(path): 101 pruned_list.append(git_files[least]) 102 least += 1 103 git_index = least 104 105 return pruned_list 106 107 108def _GetFilesFromGit(paths=None): 109 """Gets the list of files in the git repository if |paths| includes prefix 110 path filters or is empty. All complete filenames in |paths| are also included 111 in the output. 112 113 Args: 114 paths: Prefix filter for the returned paths. May contain multiple entries. 115 """ 116 partial_paths = [] 117 files = [] 118 for p in paths: 119 real_path = os.path.realpath(p) 120 if os.path.isfile(real_path): 121 files.append(real_path) 122 else: 123 partial_paths.append(real_path) 124 if partial_paths or not files: 125 args = [] 126 if sys.platform == 'win32': 127 args.append('git.bat') 128 else: 129 args.append('git') 130 args.append('ls-files') 131 command = subprocess.Popen(args, stdout=subprocess.PIPE) 132 output, _ = command.communicate() 133 git_files = [os.path.realpath(p) for p in output.splitlines()] 134 if partial_paths: 135 git_files = _PruneGitFiles(git_files, partial_paths) 136 files.extend(git_files) 137 return files 138 139 140def _GetEntriesFromCompileDB(build_directory, source_filenames): 141 """ Gets the list of files and args mentioned in the compilation database. 142 143 Args: 144 build_directory: Directory that contains the compile database. 145 source_filenames: If not None, only include entries for the given list of 146 filenames. 147 """ 148 149 filenames_set = None if source_filenames is None else set(source_filenames) 150 return [ 151 CompDBEntry(entry['directory'], entry['file'], entry['command']) 152 for entry in compile_db.Read(build_directory) 153 if filenames_set is None or os.path.realpath( 154 os.path.join(entry['directory'], entry['file'])) in filenames_set 155 ] 156 157 158def _UpdateCompileCommandsIfNeeded(compile_commands, files_list): 159 """ Filters compile database to only include required files, and makes it 160 more clang-tool friendly on Windows. 161 162 Args: 163 compile_commands: List of the contents of compile database. 164 files_list: List of required files for processing. Can be None to specify 165 no filtering. 166 Returns: 167 List of the contents of the compile database after processing. 168 """ 169 if sys.platform == 'win32' and files_list: 170 relative_paths = set([os.path.relpath(f) for f in files_list]) 171 filtered_compile_commands = [] 172 for entry in compile_commands: 173 file_path = os.path.relpath( 174 os.path.join(entry['directory'], entry['file'])) 175 if file_path in relative_paths: 176 filtered_compile_commands.append(entry) 177 else: 178 filtered_compile_commands = compile_commands 179 180 return compile_db.ProcessCompileDatabaseIfNeeded(filtered_compile_commands) 181 182 183def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry): 184 """Executes the clang tool. 185 186 This is defined outside the class so it can be pickled for the multiprocessing 187 module. 188 189 Args: 190 toolname: Name of the clang tool to execute. 191 tool_args: Arguments to be passed to the clang tool. Can be None. 192 build_directory: Directory that contains the compile database. 193 compdb_entry: The file and args to run the clang tool over. 194 195 Returns: 196 A dictionary that must contain the key "status" and a boolean value 197 associated with it. 198 199 If status is True, then the generated output is stored with the key 200 "stdout_text" in the dictionary. 201 202 Otherwise, the filename and the output from stderr are associated with the 203 keys "filename" and "stderr_text" respectively. 204 """ 205 206 args = [toolname, compdb_entry.filename] 207 if (tool_args): 208 args.extend(tool_args) 209 210 args.append('--') 211 args.extend([ 212 a for a in shlex.split(compdb_entry.command, 213 posix=(sys.platform != 'win32')) 214 # 'command' contains the full command line, including the input 215 # source file itself. We need to filter it out otherwise it's 216 # passed to the tool twice - once directly and once via 217 # the compile args. 218 if a != compdb_entry.filename 219 # /showIncludes is used by Ninja to track header file dependencies on 220 # Windows. We don't need to do this here, and it results in lots of spam 221 # and a massive log file, so we strip it. 222 and a != '/showIncludes' 223 ]) 224 225 # shlex.split escapes double qoutes in non-Posix mode, so we need to strip 226 # them back. 227 if sys.platform == 'win32': 228 args = [a.replace('\\"', '"') for a in args] 229 command = subprocess.Popen( 230 args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory) 231 stdout_text, stderr_text = command.communicate() 232 stderr_text = re.sub( 233 r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n", 234 "", stderr_text, flags=re.MULTILINE) 235 236 if command.returncode != 0: 237 return { 238 'status': False, 239 'filename': compdb_entry.filename, 240 'stderr_text': stderr_text, 241 } 242 else: 243 return { 244 'status': True, 245 'filename': compdb_entry.filename, 246 'stdout_text': stdout_text, 247 'stderr_text': stderr_text, 248 } 249 250 251class _CompilerDispatcher(object): 252 """Multiprocessing controller for running clang tools in parallel.""" 253 254 def __init__(self, toolname, tool_args, build_directory, compdb_entries): 255 """Initializer method. 256 257 Args: 258 toolname: Path to the tool to execute. 259 tool_args: Arguments to be passed to the tool. Can be None. 260 build_directory: Directory that contains the compile database. 261 compdb_entries: The files and args to run the tool over. 262 """ 263 self.__toolname = toolname 264 self.__tool_args = tool_args 265 self.__build_directory = build_directory 266 self.__compdb_entries = compdb_entries 267 self.__success_count = 0 268 self.__failed_count = 0 269 270 @property 271 def failed_count(self): 272 return self.__failed_count 273 274 def Run(self): 275 """Does the grunt work.""" 276 pool = multiprocessing.Pool() 277 result_iterator = pool.imap_unordered( 278 functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, 279 self.__build_directory), 280 self.__compdb_entries) 281 for result in result_iterator: 282 self.__ProcessResult(result) 283 sys.stderr.write('\n') 284 285 def __ProcessResult(self, result): 286 """Handles result processing. 287 288 Args: 289 result: The result dictionary returned by _ExecuteTool. 290 """ 291 if result['status']: 292 self.__success_count += 1 293 sys.stdout.write(result['stdout_text']) 294 sys.stderr.write(result['stderr_text']) 295 else: 296 self.__failed_count += 1 297 sys.stderr.write('\nFailed to process %s\n' % result['filename']) 298 sys.stderr.write(result['stderr_text']) 299 sys.stderr.write('\n') 300 done_count = self.__success_count + self.__failed_count 301 percentage = (float(done_count) / len(self.__compdb_entries)) * 100 302 # Only output progress for every 100th entry, to make log files easier to 303 # inspect. 304 if done_count % 100 == 0 or done_count == len(self.__compdb_entries): 305 sys.stderr.write( 306 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % 307 (done_count, self.__toolname, self.__failed_count, percentage)) 308 309 310def main(): 311 parser = argparse.ArgumentParser() 312 parser.add_argument( 313 '--options-file', 314 help='optional file to read options from') 315 args, argv = parser.parse_known_args() 316 if args.options_file: 317 argv = open(args.options_file).read().split() 318 319 parser.add_argument('--tool', required=True, help='clang tool to run') 320 parser.add_argument('--all', action='store_true') 321 parser.add_argument( 322 '--generate-compdb', 323 action='store_true', 324 help='regenerate the compile database before running the tool') 325 parser.add_argument( 326 '--shard', 327 metavar='<n>-of-<count>') 328 parser.add_argument( 329 '-p', 330 required=True, 331 help='path to the directory that contains the compile database') 332 parser.add_argument( 333 'path_filter', 334 nargs='*', 335 help='optional paths to filter what files the tool is run on') 336 parser.add_argument( 337 '--tool-arg', nargs='?', action='append', 338 help='optional arguments passed to the tool') 339 parser.add_argument( 340 '--tool-path', nargs='?', 341 help='optional path to the tool directory') 342 args = parser.parse_args(argv) 343 344 if args.tool_path: 345 tool_path = os.path.abspath(args.tool_path) 346 else: 347 tool_path = os.path.abspath(os.path.join( 348 os.path.dirname(__file__), 349 '../../../third_party/llvm-build/Release+Asserts/bin')) 350 351 if args.all: 352 # Reading source files is postponed to after possible regeneration of 353 # compile_commands.json. 354 source_filenames = None 355 else: 356 git_filenames = set(_GetFilesFromGit(args.path_filter)) 357 # Filter out files that aren't C/C++/Obj-C/Obj-C++. 358 extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) 359 source_filenames = [f 360 for f in git_filenames 361 if os.path.splitext(f)[1] in extensions] 362 363 if args.generate_compdb: 364 compile_commands = compile_db.GenerateWithNinja(args.p) 365 compile_commands = _UpdateCompileCommandsIfNeeded( 366 compile_commands, source_filenames) 367 with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f: 368 f.write(json.dumps(compile_commands, indent=2)) 369 370 compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames)) 371 372 if args.shard: 373 total_length = len(compdb_entries) 374 match = re.match(r'(\d+)-of-(\d+)$', args.shard) 375 # Input is 1-based, but modular arithmetic is 0-based. 376 shard_number = int(match.group(1)) - 1 377 shard_count = int(match.group(2)) 378 compdb_entries = [ 379 f for i, f in enumerate(sorted(compdb_entries)) 380 if i % shard_count == shard_number 381 ] 382 print 'Shard %d-of-%d will process %d entries out of %d' % ( 383 shard_number, shard_count, len(compdb_entries), total_length) 384 385 dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool), 386 args.tool_arg, 387 args.p, 388 compdb_entries) 389 dispatcher.Run() 390 return -dispatcher.failed_count 391 392 393if __name__ == '__main__': 394 sys.exit(main()) 395