1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# The LLVM Compiler Infrastructure 6# 7# This file is distributed under the University of Illinois Open Source 8# License. See LICENSE.TXT for details. 9# 10#===------------------------------------------------------------------------===# 11 12r""" 13clang-format git integration 14============================ 15 16This file provides a clang-format integration for git. Put it somewhere in your 17path and ensure that it is executable. Then, "git clang-format" will invoke 18clang-format on the changes in current files or a specific commit. 19 20For further details, run: 21git clang-format -h 22 23Requires Python 2.7 24""" 25 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extension 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 81 # Other languages that clang-format supports 82 'proto', 'protodevel', # Protocol Buffers 83 'java', # Java 84 'js', # JavaScript 85 'ts', # TypeScript 86 ]) 87 88 p = argparse.ArgumentParser( 89 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 90 description=desc) 91 p.add_argument('--binary', 92 default=config.get('clangformat.binary', 'clang-format'), 93 help='path to clang-format'), 94 p.add_argument('--commit', 95 default=config.get('clangformat.commit', 'HEAD'), 96 help='default commit to use if none is specified'), 97 p.add_argument('--diff', action='store_true', 98 help='print a diff instead of applying the changes') 99 p.add_argument('--extensions', 100 default=config.get('clangformat.extensions', 101 default_extensions), 102 help=('comma-separated list of file extensions to format, ' 103 'excluding the period and case-insensitive')), 104 p.add_argument('-f', '--force', action='store_true', 105 help='allow changes to unstaged files') 106 p.add_argument('-p', '--patch', action='store_true', 107 help='select hunks interactively') 108 p.add_argument('-q', '--quiet', action='count', default=0, 109 help='print less information') 110 p.add_argument('--style', 111 default=config.get('clangformat.style', None), 112 help='passed to clang-format'), 113 p.add_argument('-v', '--verbose', action='count', default=0, 114 help='print extra information') 115 # We gather all the remaining positional arguments into 'args' since we need 116 # to use some heuristics to determine whether or not <commit> was present. 117 # However, to print pretty messages, we make use of metavar and help. 118 p.add_argument('args', nargs='*', metavar='<commit>', 119 help='revision from which to compute the diff') 120 p.add_argument('ignored', nargs='*', metavar='<file>...', 121 help='if specified, only consider differences in these files') 122 opts = p.parse_args(argv) 123 124 opts.verbose -= opts.quiet 125 del opts.quiet 126 127 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 128 if len(commits) > 1: 129 if not opts.diff: 130 die('--diff is required when two commits are given') 131 else: 132 if len(commits) > 2: 133 die('at most two commits allowed; %d given' % len(commits)) 134 changed_lines = compute_diff_and_extract_lines(commits, files) 135 if opts.verbose >= 1: 136 ignored_files = set(changed_lines) 137 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 138 if opts.verbose >= 1: 139 ignored_files.difference_update(changed_lines) 140 if ignored_files: 141 print 'Ignoring changes in the following files (wrong extension):' 142 for filename in ignored_files: 143 print ' ', filename 144 if changed_lines: 145 print 'Running clang-format on the following files:' 146 for filename in changed_lines: 147 print ' ', filename 148 if not changed_lines: 149 print 'no modified files to format' 150 return 151 # The computed diff outputs absolute paths, so we must cd before accessing 152 # those files. 153 cd_to_toplevel() 154 if len(commits) > 1: 155 old_tree = commits[1] 156 new_tree = run_clang_format_and_save_to_tree(changed_lines, 157 revision=commits[1], 158 binary=opts.binary, 159 style=opts.style) 160 else: 161 old_tree = create_tree_from_workdir(changed_lines) 162 new_tree = run_clang_format_and_save_to_tree(changed_lines, 163 binary=opts.binary, 164 style=opts.style) 165 if opts.verbose >= 1: 166 print 'old tree:', old_tree 167 print 'new tree:', new_tree 168 if old_tree == new_tree: 169 if opts.verbose >= 0: 170 print 'clang-format did not modify any files' 171 elif opts.diff: 172 print_diff(old_tree, new_tree) 173 else: 174 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 175 patch_mode=opts.patch) 176 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 177 print 'changed files:' 178 for filename in changed_files: 179 print ' ', filename 180 181 182def load_git_config(non_string_options=None): 183 """Return the git configuration as a dictionary. 184 185 All options are assumed to be strings unless in `non_string_options`, in which 186 is a dictionary mapping option name (in lower case) to either "--bool" or 187 "--int".""" 188 if non_string_options is None: 189 non_string_options = {} 190 out = {} 191 for entry in run('git', 'config', '--list', '--null').split('\0'): 192 if entry: 193 name, value = entry.split('\n', 1) 194 if name in non_string_options: 195 value = run('git', 'config', non_string_options[name], name) 196 out[name] = value 197 return out 198 199 200def interpret_args(args, dash_dash, default_commit): 201 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 202 203 It is assumed that "--" and everything that follows has been removed from 204 args and placed in `dash_dash`. 205 206 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 207 left (if present) are taken as commits. Otherwise, the arguments are checked 208 from left to right if they are commits or files. If commits are not given, 209 a list with `default_commit` is used.""" 210 if dash_dash: 211 if len(args) == 0: 212 commits = [default_commit] 213 else: 214 commits = args 215 for commit in commits: 216 object_type = get_object_type(commit) 217 if object_type not in ('commit', 'tag'): 218 if object_type is None: 219 die("'%s' is not a commit" % commit) 220 else: 221 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 222 files = dash_dash[1:] 223 elif args: 224 commits = [] 225 while args: 226 if not disambiguate_revision(args[0]): 227 break 228 commits.append(args.pop(0)) 229 if not commits: 230 commits = [default_commit] 231 files = args 232 else: 233 commits = [default_commit] 234 files = [] 235 return commits, files 236 237 238def disambiguate_revision(value): 239 """Returns True if `value` is a revision, False if it is a file, or dies.""" 240 # If `value` is ambiguous (neither a commit nor a file), the following 241 # command will die with an appropriate error message. 242 run('git', 'rev-parse', value, verbose=False) 243 object_type = get_object_type(value) 244 if object_type is None: 245 return False 246 if object_type in ('commit', 'tag'): 247 return True 248 die('`%s` is a %s, but a commit or filename was expected' % 249 (value, object_type)) 250 251 252def get_object_type(value): 253 """Returns a string description of an object's type, or None if it is not 254 a valid git object.""" 255 cmd = ['git', 'cat-file', '-t', value] 256 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 257 stdout, stderr = p.communicate() 258 if p.returncode != 0: 259 return None 260 return stdout.strip() 261 262 263def compute_diff_and_extract_lines(commits, files): 264 """Calls compute_diff() followed by extract_lines().""" 265 diff_process = compute_diff(commits, files) 266 changed_lines = extract_lines(diff_process.stdout) 267 diff_process.stdout.close() 268 diff_process.wait() 269 if diff_process.returncode != 0: 270 # Assume error was already printed to stderr. 271 sys.exit(2) 272 return changed_lines 273 274 275def compute_diff(commits, files): 276 """Return a subprocess object producing the diff from `commits`. 277 278 The return value's `stdin` file object will produce a patch with the 279 differences between the working directory and the first commit if a single 280 one was specified, or the difference between both specified commits, filtered 281 on `files` (if non-empty). Zero context lines are used in the patch.""" 282 git_tool = 'diff-index' 283 if len(commits) > 1: 284 git_tool = 'diff-tree' 285 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 286 cmd.extend(files) 287 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 288 p.stdin.close() 289 return p 290 291 292def extract_lines(patch_file): 293 """Extract the changed lines in `patch_file`. 294 295 The return value is a dictionary mapping filename to a list of (start_line, 296 line_count) pairs. 297 298 The input must have been produced with ``-U0``, meaning unidiff format with 299 zero lines of context. The return value is a dict mapping filename to a 300 list of line `Range`s.""" 301 matches = {} 302 for line in patch_file: 303 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 304 if match: 305 filename = match.group(1).rstrip('\r\n') 306 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 307 if match: 308 start_line = int(match.group(1)) 309 line_count = 1 310 if match.group(3): 311 line_count = int(match.group(3)) 312 if line_count > 0: 313 matches.setdefault(filename, []).append(Range(start_line, line_count)) 314 return matches 315 316 317def filter_by_extension(dictionary, allowed_extensions): 318 """Delete every key in `dictionary` that doesn't have an allowed extension. 319 320 `allowed_extensions` must be a collection of lowercase file extensions, 321 excluding the period.""" 322 allowed_extensions = frozenset(allowed_extensions) 323 for filename in dictionary.keys(): 324 base_ext = filename.rsplit('.', 1) 325 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 326 del dictionary[filename] 327 328 329def cd_to_toplevel(): 330 """Change to the top level of the git repository.""" 331 toplevel = run('git', 'rev-parse', '--show-toplevel') 332 os.chdir(toplevel) 333 334 335def create_tree_from_workdir(filenames): 336 """Create a new git tree with the given files from the working directory. 337 338 Returns the object ID (SHA-1) of the created tree.""" 339 return create_tree(filenames, '--stdin') 340 341 342def run_clang_format_and_save_to_tree(changed_lines, revision=None, 343 binary='clang-format', style=None): 344 """Run clang-format on each file and save the result to a git tree. 345 346 Returns the object ID (SHA-1) of the created tree.""" 347 def index_info_generator(): 348 for filename, line_ranges in changed_lines.iteritems(): 349 mode = oct(os.stat(filename).st_mode) 350 blob_id = clang_format_to_blob(filename, line_ranges, 351 revision=revision, 352 binary=binary, 353 style=style) 354 yield '%s %s\t%s' % (mode, blob_id, filename) 355 return create_tree(index_info_generator(), '--index-info') 356 357 358def create_tree(input_lines, mode): 359 """Create a tree object from the given input. 360 361 If mode is '--stdin', it must be a list of filenames. If mode is 362 '--index-info' is must be a list of values suitable for "git update-index 363 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 364 is invalid.""" 365 assert mode in ('--stdin', '--index-info') 366 cmd = ['git', 'update-index', '--add', '-z', mode] 367 with temporary_index_file(): 368 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 369 for line in input_lines: 370 p.stdin.write('%s\0' % line) 371 p.stdin.close() 372 if p.wait() != 0: 373 die('`%s` failed' % ' '.join(cmd)) 374 tree_id = run('git', 'write-tree') 375 return tree_id 376 377 378def clang_format_to_blob(filename, line_ranges, revision=None, 379 binary='clang-format', style=None): 380 """Run clang-format on the given file and save the result to a git blob. 381 382 Runs on the file in `revision` if not None, or on the file in the working 383 directory if `revision` is None. 384 385 Returns the object ID (SHA-1) of the created blob.""" 386 clang_format_cmd = [binary] 387 if style: 388 clang_format_cmd.extend(['-style='+style]) 389 clang_format_cmd.extend([ 390 '-lines=%s:%s' % (start_line, start_line+line_count-1) 391 for start_line, line_count in line_ranges]) 392 if revision: 393 clang_format_cmd.extend(['-assume-filename='+filename]) 394 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 395 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 396 stdout=subprocess.PIPE) 397 git_show.stdin.close() 398 clang_format_stdin = git_show.stdout 399 else: 400 clang_format_cmd.extend([filename]) 401 git_show = None 402 clang_format_stdin = subprocess.PIPE 403 try: 404 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 405 stdout=subprocess.PIPE) 406 if clang_format_stdin == subprocess.PIPE: 407 clang_format_stdin = clang_format.stdin 408 except OSError as e: 409 if e.errno == errno.ENOENT: 410 die('cannot find executable "%s"' % binary) 411 else: 412 raise 413 clang_format_stdin.close() 414 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 415 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 416 stdout=subprocess.PIPE) 417 clang_format.stdout.close() 418 stdout = hash_object.communicate()[0] 419 if hash_object.returncode != 0: 420 die('`%s` failed' % ' '.join(hash_object_cmd)) 421 if clang_format.wait() != 0: 422 die('`%s` failed' % ' '.join(clang_format_cmd)) 423 if git_show and git_show.wait() != 0: 424 die('`%s` failed' % ' '.join(git_show_cmd)) 425 return stdout.rstrip('\r\n') 426 427 428@contextlib.contextmanager 429def temporary_index_file(tree=None): 430 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 431 the file afterward.""" 432 index_path = create_temporary_index(tree) 433 old_index_path = os.environ.get('GIT_INDEX_FILE') 434 os.environ['GIT_INDEX_FILE'] = index_path 435 try: 436 yield 437 finally: 438 if old_index_path is None: 439 del os.environ['GIT_INDEX_FILE'] 440 else: 441 os.environ['GIT_INDEX_FILE'] = old_index_path 442 os.remove(index_path) 443 444 445def create_temporary_index(tree=None): 446 """Create a temporary index file and return the created file's path. 447 448 If `tree` is not None, use that as the tree to read in. Otherwise, an 449 empty index is created.""" 450 gitdir = run('git', 'rev-parse', '--git-dir') 451 path = os.path.join(gitdir, temp_index_basename) 452 if tree is None: 453 tree = '--empty' 454 run('git', 'read-tree', '--index-output='+path, tree) 455 return path 456 457 458def print_diff(old_tree, new_tree): 459 """Print the diff between the two trees to stdout.""" 460 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 461 # is expected to be viewed by the user, and only the former does nice things 462 # like color and pagination. 463 # 464 # We also only print modified files since `new_tree` only contains the files 465 # that were modified, so unmodified files would show as deleted without the 466 # filter. 467 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 468 '--']) 469 470 471def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 472 """Apply the changes in `new_tree` to the working directory. 473 474 Bails if there are local changes in those files and not `force`. If 475 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 476 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 477 '--name-only', old_tree, 478 new_tree).rstrip('\0').split('\0') 479 if not force: 480 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 481 if unstaged_files: 482 print >>sys.stderr, ('The following files would be modified but ' 483 'have unstaged changes:') 484 print >>sys.stderr, unstaged_files 485 print >>sys.stderr, 'Please commit, stage, or stash them first.' 486 sys.exit(2) 487 if patch_mode: 488 # In patch mode, we could just as well create an index from the new tree 489 # and checkout from that, but then the user will be presented with a 490 # message saying "Discard ... from worktree". Instead, we use the old 491 # tree as the index and checkout from new_tree, which gives the slightly 492 # better message, "Apply ... to index and worktree". This is not quite 493 # right, since it won't be applied to the user's index, but oh well. 494 with temporary_index_file(old_tree): 495 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 496 index_tree = old_tree 497 else: 498 with temporary_index_file(new_tree): 499 run('git', 'checkout-index', '-a', '-f') 500 return changed_files 501 502 503def run(*args, **kwargs): 504 stdin = kwargs.pop('stdin', '') 505 verbose = kwargs.pop('verbose', True) 506 strip = kwargs.pop('strip', True) 507 for name in kwargs: 508 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 509 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 510 stdin=subprocess.PIPE) 511 stdout, stderr = p.communicate(input=stdin) 512 if p.returncode == 0: 513 if stderr: 514 if verbose: 515 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 516 print >>sys.stderr, stderr.rstrip() 517 if strip: 518 stdout = stdout.rstrip('\r\n') 519 return stdout 520 if verbose: 521 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 522 if stderr: 523 print >>sys.stderr, stderr.rstrip() 524 sys.exit(2) 525 526 527def die(message): 528 print >>sys.stderr, 'error:', message 529 sys.exit(2) 530 531 532if __name__ == '__main__': 533 main() 534