1#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Applies edits generated by a clang tool that was run on Chromium code.
6
7Synopsis:
8
9  cat run_tool.out | extract_edits.py | apply_edits.py <build dir> <filters...>
10
11For example - to apply edits only to WTF sources:
12
13  ... | apply_edits.py out/gn third_party/WebKit/Source/wtf
14
15In addition to filters specified on the command line, the tool also skips edits
16that apply to files that are not covered by git.
17"""
18
19import argparse
20import collections
21import functools
22import multiprocessing
23import os
24import os.path
25import subprocess
26import sys
27
28script_dir = os.path.dirname(os.path.realpath(__file__))
29tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib'))
30sys.path.insert(0, tool_dir)
31
32from clang import compile_db
33
34Edit = collections.namedtuple('Edit',
35                              ('edit_type', 'offset', 'length', 'replacement'))
36
37
38def _GetFilesFromGit(paths=None):
39  """Gets the list of files in the git repository.
40
41  Args:
42    paths: Prefix filter for the returned paths. May contain multiple entries.
43  """
44  args = []
45  if sys.platform == 'win32':
46    args.append('git.bat')
47  else:
48    args.append('git')
49  args.append('ls-files')
50  if paths:
51    args.extend(paths)
52  command = subprocess.Popen(args, stdout=subprocess.PIPE)
53  output, _ = command.communicate()
54  return [os.path.realpath(p) for p in output.splitlines()]
55
56
57def _ParseEditsFromStdin(build_directory):
58  """Extracts generated list of edits from the tool's stdout.
59
60  The expected format is documented at the top of this file.
61
62  Args:
63    build_directory: Directory that contains the compile database. Used to
64      normalize the filenames.
65    stdout: The stdout from running the clang tool.
66
67  Returns:
68    A dictionary mapping filenames to the associated edits.
69  """
70  path_to_resolved_path = {}
71  def _ResolvePath(path):
72    if path in path_to_resolved_path:
73      return path_to_resolved_path[path]
74
75    if not os.path.isfile(path):
76      resolved_path = os.path.realpath(os.path.join(build_directory, path))
77    else:
78      resolved_path = path
79
80    if not os.path.isfile(resolved_path):
81      sys.stderr.write('Edit applies to a non-existent file: %s\n' % path)
82      resolved_path = None
83
84    path_to_resolved_path[path] = resolved_path
85    return resolved_path
86
87  edits = collections.defaultdict(list)
88  for line in sys.stdin:
89    line = line.rstrip("\n\r")
90    try:
91      edit_type, path, offset, length, replacement = line.split(':::', 4)
92      replacement = replacement.replace('\0', '\n')
93      path = _ResolvePath(path)
94      if not path: continue
95      edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
96    except ValueError:
97      sys.stderr.write('Unable to parse edit: %s\n' % line)
98  return edits
99
100
101def _ApplyEditsToSingleFile(filename, edits):
102  # Sort the edits and iterate through them in reverse order. Sorting allows
103  # duplicate edits to be quickly skipped, while reversing means that
104  # subsequent edits don't need to have their offsets updated with each edit
105  # applied.
106  edit_count = 0
107  error_count = 0
108  edits.sort()
109  last_edit = None
110  with open(filename, 'rb+') as f:
111    contents = bytearray(f.read())
112    for edit in reversed(edits):
113      if edit == last_edit:
114        continue
115      if (last_edit is not None and edit.edit_type == last_edit.edit_type and
116          edit.offset == last_edit.offset and edit.length == last_edit.length):
117        sys.stderr.write(
118            'Conflicting edit: %s at offset %d, length %d: "%s" != "%s"\n' %
119            (filename, edit.offset, edit.length, edit.replacement,
120             last_edit.replacement))
121        error_count += 1
122        continue
123
124      last_edit = edit
125      contents[edit.offset:edit.offset + edit.length] = edit.replacement
126      if not edit.replacement:
127        _ExtendDeletionIfElementIsInList(contents, edit.offset)
128      edit_count += 1
129    f.seek(0)
130    f.truncate()
131    f.write(contents)
132  return (edit_count, error_count)
133
134
135def _ApplyEdits(edits):
136  """Apply the generated edits.
137
138  Args:
139    edits: A dict mapping filenames to Edit instances that apply to that file.
140  """
141  edit_count = 0
142  error_count = 0
143  done_files = 0
144  for k, v in edits.iteritems():
145    tmp_edit_count, tmp_error_count = _ApplyEditsToSingleFile(k, v)
146    edit_count += tmp_edit_count
147    error_count += tmp_error_count
148    done_files += 1
149    percentage = (float(done_files) / len(edits)) * 100
150    sys.stdout.write('Applied %d edits (%d errors) to %d files [%.2f%%]\r' %
151                     (edit_count, error_count, done_files, percentage))
152
153  sys.stdout.write('\n')
154  return -error_count
155
156
157_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
158
159
160def _ExtendDeletionIfElementIsInList(contents, offset):
161  """Extends the range of a deletion if the deleted element was part of a list.
162
163  This rewriter helper makes it easy for refactoring tools to remove elements
164  from a list. Even if a matcher callback knows that it is removing an element
165  from a list, it may not have enough information to accurately remove the list
166  element; for example, another matcher callback may end up removing an adjacent
167  list element, or all the list elements may end up being removed.
168
169  With this helper, refactoring tools can simply remove the list element and not
170  worry about having to include the comma in the replacement.
171
172  Args:
173    contents: A bytearray with the deletion already applied.
174    offset: The offset in the bytearray where the deleted range used to be.
175  """
176  char_before = char_after = None
177  left_trim_count = 0
178  for byte in reversed(contents[:offset]):
179    left_trim_count += 1
180    if byte in _WHITESPACE_BYTES:
181      continue
182    if byte in (ord(','), ord(':'), ord('('), ord('{')):
183      char_before = chr(byte)
184    break
185
186  right_trim_count = 0
187  for byte in contents[offset:]:
188    right_trim_count += 1
189    if byte in _WHITESPACE_BYTES:
190      continue
191    if byte == ord(','):
192      char_after = chr(byte)
193    break
194
195  if char_before:
196    if char_after:
197      del contents[offset:offset + right_trim_count]
198    elif char_before in (',', ':'):
199      del contents[offset - left_trim_count:offset]
200
201
202def main():
203  parser = argparse.ArgumentParser()
204  parser.add_argument(
205      '-p',
206      required=True,
207      help='path to the build dir (dir that edit paths are relative to)')
208  parser.add_argument(
209      'path_filter',
210      nargs='*',
211      help='optional paths to filter what files the tool is run on')
212  args = parser.parse_args()
213
214  filenames = set(_GetFilesFromGit(args.path_filter))
215  edits = _ParseEditsFromStdin(args.p)
216  return _ApplyEdits(
217      {k: v for k, v in edits.iteritems()
218            if os.path.realpath(k) in filenames})
219
220
221if __name__ == '__main__':
222  sys.exit(main())
223