1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""
13clang-format git integration
14============================
15
16This file provides a clang-format integration for git. Put it somewhere in your
17path and ensure that it is executable. Then, "git clang-format" will invoke
18clang-format on the changes in current files or a specific commit.
19
20For further details, run:
21git clang-format -h
22
23Requires Python 2.7
24"""
25
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extension
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp',  # C++
81      # Other languages that clang-format supports
82      'proto', 'protodevel',  # Protocol Buffers
83      'java',  # Java
84      'js',  # JavaScript
85      'ts',  # TypeScript
86      ])
87
88  p = argparse.ArgumentParser(
89    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
90    description=desc)
91  p.add_argument('--binary',
92                 default=config.get('clangformat.binary', 'clang-format'),
93                 help='path to clang-format'),
94  p.add_argument('--commit',
95                 default=config.get('clangformat.commit', 'HEAD'),
96                 help='default commit to use if none is specified'),
97  p.add_argument('--diff', action='store_true',
98                 help='print a diff instead of applying the changes')
99  p.add_argument('--extensions',
100                 default=config.get('clangformat.extensions',
101                                    default_extensions),
102                 help=('comma-separated list of file extensions to format, '
103                       'excluding the period and case-insensitive')),
104  p.add_argument('-f', '--force', action='store_true',
105                 help='allow changes to unstaged files')
106  p.add_argument('-p', '--patch', action='store_true',
107                 help='select hunks interactively')
108  p.add_argument('-q', '--quiet', action='count', default=0,
109                 help='print less information')
110  p.add_argument('--style',
111                 default=config.get('clangformat.style', None),
112                 help='passed to clang-format'),
113  p.add_argument('-v', '--verbose', action='count', default=0,
114                 help='print extra information')
115  # We gather all the remaining positional arguments into 'args' since we need
116  # to use some heuristics to determine whether or not <commit> was present.
117  # However, to print pretty messages, we make use of metavar and help.
118  p.add_argument('args', nargs='*', metavar='<commit>',
119                 help='revision from which to compute the diff')
120  p.add_argument('ignored', nargs='*', metavar='<file>...',
121                 help='if specified, only consider differences in these files')
122  opts = p.parse_args(argv)
123
124  opts.verbose -= opts.quiet
125  del opts.quiet
126
127  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
128  if len(commits) > 1:
129    if not opts.diff:
130      die('--diff is required when two commits are given')
131  else:
132    if len(commits) > 2:
133      die('at most two commits allowed; %d given' % len(commits))
134  changed_lines = compute_diff_and_extract_lines(commits, files)
135  if opts.verbose >= 1:
136    ignored_files = set(changed_lines)
137  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
138  if opts.verbose >= 1:
139    ignored_files.difference_update(changed_lines)
140    if ignored_files:
141      print 'Ignoring changes in the following files (wrong extension):'
142      for filename in ignored_files:
143        print '   ', filename
144    if changed_lines:
145      print 'Running clang-format on the following files:'
146      for filename in changed_lines:
147        print '   ', filename
148  if not changed_lines:
149    print 'no modified files to format'
150    return
151  # The computed diff outputs absolute paths, so we must cd before accessing
152  # those files.
153  cd_to_toplevel()
154  if len(commits) > 1:
155    old_tree = commits[1]
156    new_tree = run_clang_format_and_save_to_tree(changed_lines,
157                                                 revision=commits[1],
158                                                 binary=opts.binary,
159                                                 style=opts.style)
160  else:
161    old_tree = create_tree_from_workdir(changed_lines)
162    new_tree = run_clang_format_and_save_to_tree(changed_lines,
163                                                 binary=opts.binary,
164                                                 style=opts.style)
165  if opts.verbose >= 1:
166    print 'old tree:', old_tree
167    print 'new tree:', new_tree
168  if old_tree == new_tree:
169    if opts.verbose >= 0:
170      print 'clang-format did not modify any files'
171  elif opts.diff:
172    print_diff(old_tree, new_tree)
173  else:
174    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
175                                  patch_mode=opts.patch)
176    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
177      print 'changed files:'
178      for filename in changed_files:
179        print '   ', filename
180
181
182def load_git_config(non_string_options=None):
183  """Return the git configuration as a dictionary.
184
185  All options are assumed to be strings unless in `non_string_options`, in which
186  is a dictionary mapping option name (in lower case) to either "--bool" or
187  "--int"."""
188  if non_string_options is None:
189    non_string_options = {}
190  out = {}
191  for entry in run('git', 'config', '--list', '--null').split('\0'):
192    if entry:
193      name, value = entry.split('\n', 1)
194      if name in non_string_options:
195        value = run('git', 'config', non_string_options[name], name)
196      out[name] = value
197  return out
198
199
200def interpret_args(args, dash_dash, default_commit):
201  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
202
203  It is assumed that "--" and everything that follows has been removed from
204  args and placed in `dash_dash`.
205
206  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
207  left (if present) are taken as commits.  Otherwise, the arguments are checked
208  from left to right if they are commits or files.  If commits are not given,
209  a list with `default_commit` is used."""
210  if dash_dash:
211    if len(args) == 0:
212      commits = [default_commit]
213    else:
214      commits = args
215    for commit in commits:
216      object_type = get_object_type(commit)
217      if object_type not in ('commit', 'tag'):
218        if object_type is None:
219          die("'%s' is not a commit" % commit)
220        else:
221          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
222    files = dash_dash[1:]
223  elif args:
224    commits = []
225    while args:
226      if not disambiguate_revision(args[0]):
227        break
228      commits.append(args.pop(0))
229    if not commits:
230      commits = [default_commit]
231    files = args
232  else:
233    commits = [default_commit]
234    files = []
235  return commits, files
236
237
238def disambiguate_revision(value):
239  """Returns True if `value` is a revision, False if it is a file, or dies."""
240  # If `value` is ambiguous (neither a commit nor a file), the following
241  # command will die with an appropriate error message.
242  run('git', 'rev-parse', value, verbose=False)
243  object_type = get_object_type(value)
244  if object_type is None:
245    return False
246  if object_type in ('commit', 'tag'):
247    return True
248  die('`%s` is a %s, but a commit or filename was expected' %
249      (value, object_type))
250
251
252def get_object_type(value):
253  """Returns a string description of an object's type, or None if it is not
254  a valid git object."""
255  cmd = ['git', 'cat-file', '-t', value]
256  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
257  stdout, stderr = p.communicate()
258  if p.returncode != 0:
259    return None
260  return stdout.strip()
261
262
263def compute_diff_and_extract_lines(commits, files):
264  """Calls compute_diff() followed by extract_lines()."""
265  diff_process = compute_diff(commits, files)
266  changed_lines = extract_lines(diff_process.stdout)
267  diff_process.stdout.close()
268  diff_process.wait()
269  if diff_process.returncode != 0:
270    # Assume error was already printed to stderr.
271    sys.exit(2)
272  return changed_lines
273
274
275def compute_diff(commits, files):
276  """Return a subprocess object producing the diff from `commits`.
277
278  The return value's `stdin` file object will produce a patch with the
279  differences between the working directory and the first commit if a single
280  one was specified, or the difference between both specified commits, filtered
281  on `files` (if non-empty).  Zero context lines are used in the patch."""
282  git_tool = 'diff-index'
283  if len(commits) > 1:
284    git_tool = 'diff-tree'
285  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
286  cmd.extend(files)
287  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
288  p.stdin.close()
289  return p
290
291
292def extract_lines(patch_file):
293  """Extract the changed lines in `patch_file`.
294
295  The return value is a dictionary mapping filename to a list of (start_line,
296  line_count) pairs.
297
298  The input must have been produced with ``-U0``, meaning unidiff format with
299  zero lines of context.  The return value is a dict mapping filename to a
300  list of line `Range`s."""
301  matches = {}
302  for line in patch_file:
303    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
304    if match:
305      filename = match.group(1).rstrip('\r\n')
306    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
307    if match:
308      start_line = int(match.group(1))
309      line_count = 1
310      if match.group(3):
311        line_count = int(match.group(3))
312      if line_count > 0:
313        matches.setdefault(filename, []).append(Range(start_line, line_count))
314  return matches
315
316
317def filter_by_extension(dictionary, allowed_extensions):
318  """Delete every key in `dictionary` that doesn't have an allowed extension.
319
320  `allowed_extensions` must be a collection of lowercase file extensions,
321  excluding the period."""
322  allowed_extensions = frozenset(allowed_extensions)
323  for filename in dictionary.keys():
324    base_ext = filename.rsplit('.', 1)
325    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
326      del dictionary[filename]
327
328
329def cd_to_toplevel():
330  """Change to the top level of the git repository."""
331  toplevel = run('git', 'rev-parse', '--show-toplevel')
332  os.chdir(toplevel)
333
334
335def create_tree_from_workdir(filenames):
336  """Create a new git tree with the given files from the working directory.
337
338  Returns the object ID (SHA-1) of the created tree."""
339  return create_tree(filenames, '--stdin')
340
341
342def run_clang_format_and_save_to_tree(changed_lines, revision=None,
343                                      binary='clang-format', style=None):
344  """Run clang-format on each file and save the result to a git tree.
345
346  Returns the object ID (SHA-1) of the created tree."""
347  def index_info_generator():
348    for filename, line_ranges in changed_lines.iteritems():
349      mode = oct(os.stat(filename).st_mode)
350      blob_id = clang_format_to_blob(filename, line_ranges,
351                                     revision=revision,
352                                     binary=binary,
353                                     style=style)
354      yield '%s %s\t%s' % (mode, blob_id, filename)
355  return create_tree(index_info_generator(), '--index-info')
356
357
358def create_tree(input_lines, mode):
359  """Create a tree object from the given input.
360
361  If mode is '--stdin', it must be a list of filenames.  If mode is
362  '--index-info' is must be a list of values suitable for "git update-index
363  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
364  is invalid."""
365  assert mode in ('--stdin', '--index-info')
366  cmd = ['git', 'update-index', '--add', '-z', mode]
367  with temporary_index_file():
368    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
369    for line in input_lines:
370      p.stdin.write('%s\0' % line)
371    p.stdin.close()
372    if p.wait() != 0:
373      die('`%s` failed' % ' '.join(cmd))
374    tree_id = run('git', 'write-tree')
375    return tree_id
376
377
378def clang_format_to_blob(filename, line_ranges, revision=None,
379                         binary='clang-format', style=None):
380  """Run clang-format on the given file and save the result to a git blob.
381
382  Runs on the file in `revision` if not None, or on the file in the working
383  directory if `revision` is None.
384
385  Returns the object ID (SHA-1) of the created blob."""
386  clang_format_cmd = [binary]
387  if style:
388    clang_format_cmd.extend(['-style='+style])
389  clang_format_cmd.extend([
390      '-lines=%s:%s' % (start_line, start_line+line_count-1)
391      for start_line, line_count in line_ranges])
392  if revision:
393    clang_format_cmd.extend(['-assume-filename='+filename])
394    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
395    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
396                                stdout=subprocess.PIPE)
397    git_show.stdin.close()
398    clang_format_stdin = git_show.stdout
399  else:
400    clang_format_cmd.extend([filename])
401    git_show = None
402    clang_format_stdin = subprocess.PIPE
403  try:
404    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
405                                    stdout=subprocess.PIPE)
406    if clang_format_stdin == subprocess.PIPE:
407      clang_format_stdin = clang_format.stdin
408  except OSError as e:
409    if e.errno == errno.ENOENT:
410      die('cannot find executable "%s"' % binary)
411    else:
412      raise
413  clang_format_stdin.close()
414  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
415  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
416                                 stdout=subprocess.PIPE)
417  clang_format.stdout.close()
418  stdout = hash_object.communicate()[0]
419  if hash_object.returncode != 0:
420    die('`%s` failed' % ' '.join(hash_object_cmd))
421  if clang_format.wait() != 0:
422    die('`%s` failed' % ' '.join(clang_format_cmd))
423  if git_show and git_show.wait() != 0:
424    die('`%s` failed' % ' '.join(git_show_cmd))
425  return stdout.rstrip('\r\n')
426
427
428@contextlib.contextmanager
429def temporary_index_file(tree=None):
430  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
431  the file afterward."""
432  index_path = create_temporary_index(tree)
433  old_index_path = os.environ.get('GIT_INDEX_FILE')
434  os.environ['GIT_INDEX_FILE'] = index_path
435  try:
436    yield
437  finally:
438    if old_index_path is None:
439      del os.environ['GIT_INDEX_FILE']
440    else:
441      os.environ['GIT_INDEX_FILE'] = old_index_path
442    os.remove(index_path)
443
444
445def create_temporary_index(tree=None):
446  """Create a temporary index file and return the created file's path.
447
448  If `tree` is not None, use that as the tree to read in.  Otherwise, an
449  empty index is created."""
450  gitdir = run('git', 'rev-parse', '--git-dir')
451  path = os.path.join(gitdir, temp_index_basename)
452  if tree is None:
453    tree = '--empty'
454  run('git', 'read-tree', '--index-output='+path, tree)
455  return path
456
457
458def print_diff(old_tree, new_tree):
459  """Print the diff between the two trees to stdout."""
460  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
461  # is expected to be viewed by the user, and only the former does nice things
462  # like color and pagination.
463  #
464  # We also only print modified files since `new_tree` only contains the files
465  # that were modified, so unmodified files would show as deleted without the
466  # filter.
467  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
468                         '--'])
469
470
471def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
472  """Apply the changes in `new_tree` to the working directory.
473
474  Bails if there are local changes in those files and not `force`.  If
475  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
476  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
477                      '--name-only', old_tree,
478                      new_tree).rstrip('\0').split('\0')
479  if not force:
480    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
481    if unstaged_files:
482      print >>sys.stderr, ('The following files would be modified but '
483                           'have unstaged changes:')
484      print >>sys.stderr, unstaged_files
485      print >>sys.stderr, 'Please commit, stage, or stash them first.'
486      sys.exit(2)
487  if patch_mode:
488    # In patch mode, we could just as well create an index from the new tree
489    # and checkout from that, but then the user will be presented with a
490    # message saying "Discard ... from worktree".  Instead, we use the old
491    # tree as the index and checkout from new_tree, which gives the slightly
492    # better message, "Apply ... to index and worktree".  This is not quite
493    # right, since it won't be applied to the user's index, but oh well.
494    with temporary_index_file(old_tree):
495      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
496    index_tree = old_tree
497  else:
498    with temporary_index_file(new_tree):
499      run('git', 'checkout-index', '-a', '-f')
500  return changed_files
501
502
503def run(*args, **kwargs):
504  stdin = kwargs.pop('stdin', '')
505  verbose = kwargs.pop('verbose', True)
506  strip = kwargs.pop('strip', True)
507  for name in kwargs:
508    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
509  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
510                       stdin=subprocess.PIPE)
511  stdout, stderr = p.communicate(input=stdin)
512  if p.returncode == 0:
513    if stderr:
514      if verbose:
515        print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
516      print >>sys.stderr, stderr.rstrip()
517    if strip:
518      stdout = stdout.rstrip('\r\n')
519    return stdout
520  if verbose:
521    print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
522  if stderr:
523    print >>sys.stderr, stderr.rstrip()
524  sys.exit(2)
525
526
527def die(message):
528  print >>sys.stderr, 'error:', message
529  sys.exit(2)
530
531
532if __name__ == '__main__':
533  main()
534