1#!/usr/bin/env python 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Wrapper script to help run clang tools across Chromium code. 6 7How to use this tool: 8If you want to run the tool across all Chromium code: 9run_tool.py <tool> <path/to/compiledb> 10 11If you want to include all files mentioned in the compilation database: 12run_tool.py <tool> <path/to/compiledb> --all 13 14If you only want to run the tool across just chrome/browser and content/browser: 15run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser 16 17Please see https://chromium.googlesource.com/chromium/src/+/master/docs/clang_tool_refactoring.md for more 18information, which documents the entire automated refactoring flow in Chromium. 19 20Why use this tool: 21The clang tool implementation doesn't take advantage of multiple cores, and if 22it fails mysteriously in the middle, all the generated replacements will be 23lost. 24 25Unfortunately, if the work is simply sharded across multiple cores by running 26multiple RefactoringTools, problems arise when they attempt to rewrite a file at 27the same time. To work around that, clang tools that are run using this tool 28should output edits to stdout in the following format: 29 30==== BEGIN EDITS ==== 31r:<file path>:<offset>:<length>:<replacement text> 32r:<file path>:<offset>:<length>:<replacement text> 33...etc... 34==== END EDITS ==== 35 36Any generated edits are applied once the clang tool has finished running 37across Chromium, regardless of whether some instances failed or not. 38""" 39 40import argparse 41import collections 42import functools 43import multiprocessing 44import os 45import os.path 46import subprocess 47import sys 48 49script_dir = os.path.dirname(os.path.realpath(__file__)) 50tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) 51sys.path.insert(0, tool_dir) 52 53from clang import compile_db 54 55Edit = collections.namedtuple('Edit', 56 ('edit_type', 'offset', 'length', 'replacement')) 57 58 59def _GetFilesFromGit(paths=None): 60 """Gets the list of files in the git repository. 61 62 Args: 63 paths: Prefix filter for the returned paths. May contain multiple entries. 64 """ 65 args = [] 66 if sys.platform == 'win32': 67 args.append('git.bat') 68 else: 69 args.append('git') 70 args.append('ls-files') 71 if paths: 72 args.extend(paths) 73 command = subprocess.Popen(args, stdout=subprocess.PIPE) 74 output, _ = command.communicate() 75 return [os.path.realpath(p) for p in output.splitlines()] 76 77 78def _GetFilesFromCompileDB(build_directory): 79 """ Gets the list of files mentioned in the compilation database. 80 81 Args: 82 build_directory: Directory that contains the compile database. 83 """ 84 return [os.path.join(entry['directory'], entry['file']) 85 for entry in compile_db.Read(build_directory)] 86 87 88def _ExtractEditsFromStdout(build_directory, stdout): 89 """Extracts generated list of edits from the tool's stdout. 90 91 The expected format is documented at the top of this file. 92 93 Args: 94 build_directory: Directory that contains the compile database. Used to 95 normalize the filenames. 96 stdout: The stdout from running the clang tool. 97 98 Returns: 99 A dictionary mapping filenames to the associated edits. 100 """ 101 lines = stdout.splitlines() 102 start_index = lines.index('==== BEGIN EDITS ====') 103 end_index = lines.index('==== END EDITS ====') 104 edits = collections.defaultdict(list) 105 for line in lines[start_index + 1:end_index]: 106 try: 107 edit_type, path, offset, length, replacement = line.split(':::', 4) 108 replacement = replacement.replace('\0', '\n') 109 # Normalize the file path emitted by the clang tool. 110 path = os.path.realpath(os.path.join(build_directory, path)) 111 edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) 112 except ValueError: 113 print 'Unable to parse edit: %s' % line 114 return edits 115 116 117def _ExecuteTool(toolname, build_directory, filename): 118 """Executes the tool. 119 120 This is defined outside the class so it can be pickled for the multiprocessing 121 module. 122 123 Args: 124 toolname: Path to the tool to execute. 125 build_directory: Directory that contains the compile database. 126 filename: The file to run the tool over. 127 128 Returns: 129 A dictionary that must contain the key "status" and a boolean value 130 associated with it. 131 132 If status is True, then the generated edits are stored with the key "edits" 133 in the dictionary. 134 135 Otherwise, the filename and the output from stderr are associated with the 136 keys "filename" and "stderr" respectively. 137 """ 138 command = subprocess.Popen( 139 (toolname, '-p', build_directory, filename), 140 stdout=subprocess.PIPE, 141 stderr=subprocess.PIPE) 142 stdout, stderr = command.communicate() 143 if command.returncode != 0: 144 return {'status': False, 'filename': filename, 'stderr': stderr} 145 else: 146 return {'status': True, 147 'edits': _ExtractEditsFromStdout(build_directory, stdout)} 148 149 150class _CompilerDispatcher(object): 151 """Multiprocessing controller for running clang tools in parallel.""" 152 153 def __init__(self, toolname, build_directory, filenames): 154 """Initializer method. 155 156 Args: 157 toolname: Path to the tool to execute. 158 build_directory: Directory that contains the compile database. 159 filenames: The files to run the tool over. 160 """ 161 self.__toolname = toolname 162 self.__build_directory = build_directory 163 self.__filenames = filenames 164 self.__success_count = 0 165 self.__failed_count = 0 166 self.__edit_count = 0 167 self.__edits = collections.defaultdict(list) 168 169 @property 170 def edits(self): 171 return self.__edits 172 173 @property 174 def failed_count(self): 175 return self.__failed_count 176 177 def Run(self): 178 """Does the grunt work.""" 179 pool = multiprocessing.Pool() 180 result_iterator = pool.imap_unordered( 181 functools.partial(_ExecuteTool, self.__toolname, 182 self.__build_directory), self.__filenames) 183 for result in result_iterator: 184 self.__ProcessResult(result) 185 sys.stdout.write('\n') 186 sys.stdout.flush() 187 188 def __ProcessResult(self, result): 189 """Handles result processing. 190 191 Args: 192 result: The result dictionary returned by _ExecuteTool. 193 """ 194 if result['status']: 195 self.__success_count += 1 196 for k, v in result['edits'].iteritems(): 197 self.__edits[k].extend(v) 198 self.__edit_count += len(v) 199 else: 200 self.__failed_count += 1 201 sys.stdout.write('\nFailed to process %s\n' % result['filename']) 202 sys.stdout.write(result['stderr']) 203 sys.stdout.write('\n') 204 percentage = (float(self.__success_count + self.__failed_count) / 205 len(self.__filenames)) * 100 206 sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % 207 (self.__success_count, self.__failed_count, 208 self.__edit_count, percentage)) 209 sys.stdout.flush() 210 211 212def _ApplyEdits(edits): 213 """Apply the generated edits. 214 215 Args: 216 edits: A dict mapping filenames to Edit instances that apply to that file. 217 """ 218 edit_count = 0 219 for k, v in edits.iteritems(): 220 # Sort the edits and iterate through them in reverse order. Sorting allows 221 # duplicate edits to be quickly skipped, while reversing means that 222 # subsequent edits don't need to have their offsets updated with each edit 223 # applied. 224 v.sort() 225 last_edit = None 226 with open(k, 'rb+') as f: 227 contents = bytearray(f.read()) 228 for edit in reversed(v): 229 if edit == last_edit: 230 continue 231 last_edit = edit 232 contents[edit.offset:edit.offset + edit.length] = edit.replacement 233 if not edit.replacement: 234 _ExtendDeletionIfElementIsInList(contents, edit.offset) 235 edit_count += 1 236 f.seek(0) 237 f.truncate() 238 f.write(contents) 239 print 'Applied %d edits to %d files' % (edit_count, len(edits)) 240 241 242_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) 243 244 245def _ExtendDeletionIfElementIsInList(contents, offset): 246 """Extends the range of a deletion if the deleted element was part of a list. 247 248 This rewriter helper makes it easy for refactoring tools to remove elements 249 from a list. Even if a matcher callback knows that it is removing an element 250 from a list, it may not have enough information to accurately remove the list 251 element; for example, another matcher callback may end up removing an adjacent 252 list element, or all the list elements may end up being removed. 253 254 With this helper, refactoring tools can simply remove the list element and not 255 worry about having to include the comma in the replacement. 256 257 Args: 258 contents: A bytearray with the deletion already applied. 259 offset: The offset in the bytearray where the deleted range used to be. 260 """ 261 char_before = char_after = None 262 left_trim_count = 0 263 for byte in reversed(contents[:offset]): 264 left_trim_count += 1 265 if byte in _WHITESPACE_BYTES: 266 continue 267 if byte in (ord(','), ord(':'), ord('('), ord('{')): 268 char_before = chr(byte) 269 break 270 271 right_trim_count = 0 272 for byte in contents[offset:]: 273 right_trim_count += 1 274 if byte in _WHITESPACE_BYTES: 275 continue 276 if byte == ord(','): 277 char_after = chr(byte) 278 break 279 280 if char_before: 281 if char_after: 282 del contents[offset:offset + right_trim_count] 283 elif char_before in (',', ':'): 284 del contents[offset - left_trim_count:offset] 285 286 287def main(): 288 parser = argparse.ArgumentParser() 289 parser.add_argument('tool', help='clang tool to run') 290 parser.add_argument('--all', action='store_true') 291 parser.add_argument( 292 '--generate-compdb', 293 action='store_true', 294 help='regenerate the compile database before running the tool') 295 parser.add_argument( 296 'compile_database', 297 help='path to the directory that contains the compile database') 298 parser.add_argument( 299 'path_filter', 300 nargs='*', 301 help='optional paths to filter what files the tool is run on') 302 args = parser.parse_args() 303 304 os.environ['PATH'] = '%s%s%s' % ( 305 os.path.abspath(os.path.join( 306 os.path.dirname(__file__), 307 '../../../third_party/llvm-build/Release+Asserts/bin')), 308 os.pathsep, 309 os.environ['PATH']) 310 311 if args.generate_compdb: 312 compile_db.GenerateWithNinja(args.compile_database) 313 314 if args.all: 315 filenames = set(_GetFilesFromCompileDB(args.compile_database)) 316 source_filenames = filenames 317 else: 318 filenames = set(_GetFilesFromGit(args.path_filter)) 319 # Filter out files that aren't C/C++/Obj-C/Obj-C++. 320 extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) 321 source_filenames = [f 322 for f in filenames 323 if os.path.splitext(f)[1] in extensions] 324 dispatcher = _CompilerDispatcher(args.tool, args.compile_database, 325 source_filenames) 326 dispatcher.Run() 327 # Filter out edits to files that aren't in the git repository, since it's not 328 # useful to modify files that aren't under source control--typically, these 329 # are generated files or files in a git submodule that's not part of Chromium. 330 _ApplyEdits({k: v 331 for k, v in dispatcher.edits.iteritems() 332 if os.path.realpath(k) in filenames}) 333 return -dispatcher.failed_count 334 335 336if __name__ == '__main__': 337 sys.exit(main()) 338