1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
15
16[for a description of what an unwrapped line is, see unwrapped_line.py]
17
18This is a pytree visitor that goes over a parse tree and produces a list of
19UnwrappedLine containers from it, each with its own depth and containing all
20the tokens that could fit on the line if there were no maximal line-length
21limitations.
22
23Note: a precondition to running this visitor and obtaining correct results is
24for the tree to have its comments spliced in as nodes. Prefixes are ignored.
25
26For most uses, the convenience function UnwrapPyTree should be sufficient.
27"""
28
29# The word "token" is overloaded within this module, so for clarity rename
30# the imported pgen2.token module.
31from lib2to3 import pytree
32from lib2to3.pgen2 import token as grammar_token
33
34from yapf.yapflib import pytree_utils
35from yapf.yapflib import pytree_visitor
36from yapf.yapflib import split_penalty
37from yapf.yapflib import style
38from yapf.yapflib import unwrapped_line
39
40
41def UnwrapPyTree(tree):
42  """Create and return a list of unwrapped lines from the given pytree.
43
44  Arguments:
45    tree: the top-level pytree node to unwrap.
46
47  Returns:
48    A list of UnwrappedLine objects.
49  """
50  unwrapper = PyTreeUnwrapper()
51  unwrapper.Visit(tree)
52  uwlines = unwrapper.GetUnwrappedLines()
53  uwlines.sort(key=lambda x: x.lineno)
54  return uwlines
55
56
57# Grammar tokens considered as whitespace for the purpose of unwrapping.
58_WHITESPACE_TOKENS = frozenset([
59    grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
60    grammar_token.ENDMARKER
61])
62
63
64class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
65  """PyTreeUnwrapper - see file-level docstring for detailed description.
66
67  Note: since this implements PyTreeVisitor and node names in lib2to3 are
68  underscore_separated, the visiting methods of this class are named as
69  Visit_node_name. invalid-name pragmas are added to each such method to silence
70  a style warning. This is forced on us by the usage of lib2to3, and re-munging
71  method names to make them different from actual node names sounded like a
72  confusing and brittle affair that wasn't worth it for this small & controlled
73  deviation from the style guide.
74
75  To understand the connection between visitor methods in this class, some
76  familiarity with the Python grammar is required.
77  """
78
79  def __init__(self):
80    # A list of all unwrapped lines finished visiting so far.
81    self._unwrapped_lines = []
82
83    # Builds up a "current" unwrapped line while visiting pytree nodes. Some
84    # nodes will finish a line and start a new one.
85    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
86
87    # Current indentation depth.
88    self._cur_depth = 0
89
90  def GetUnwrappedLines(self):
91    """Fetch the result of the tree walk.
92
93    Note: only call this after visiting the whole tree.
94
95    Returns:
96      A list of UnwrappedLine objects.
97    """
98    # Make sure the last line that was being populated is flushed.
99    self._StartNewLine()
100    return self._unwrapped_lines
101
102  def _StartNewLine(self):
103    """Finish current line and start a new one.
104
105    Place the currently accumulated line into the _unwrapped_lines list and
106    start a new one.
107    """
108    if self._cur_unwrapped_line.tokens:
109      self._unwrapped_lines.append(self._cur_unwrapped_line)
110      _MatchBrackets(self._cur_unwrapped_line)
111      _AdjustSplitPenalty(self._cur_unwrapped_line)
112    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
113
114  _STMT_TYPES = frozenset({
115      'if_stmt',
116      'while_stmt',
117      'for_stmt',
118      'try_stmt',
119      'expect_clause',
120      'with_stmt',
121      'funcdef',
122      'classdef',
123  })
124
125  # pylint: disable=invalid-name,missing-docstring
126  def Visit_simple_stmt(self, node):
127    # A 'simple_stmt' conveniently represents a non-compound Python statement,
128    # i.e. a statement that does not contain other statements.
129
130    # When compound nodes have a single statement as their suite, the parser
131    # can leave it in the tree directly without creating a suite. But we have
132    # to increase depth in these cases as well. However, don't increase the
133    # depth of we have a simple_stmt that's a comment node. This represents a
134    # standalone comment and in the case of it coming directly after the
135    # funcdef, it is a "top" comment for the whole function.
136    # TODO(eliben): add more relevant compound statements here.
137    single_stmt_suite = (
138        node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
139    is_comment_stmt = pytree_utils.IsCommentStatement(node)
140    if single_stmt_suite and not is_comment_stmt:
141      self._cur_depth += 1
142    self._StartNewLine()
143    self.DefaultNodeVisit(node)
144    if single_stmt_suite and not is_comment_stmt:
145      self._cur_depth -= 1
146
147  def _VisitCompoundStatement(self, node, substatement_names):
148    """Helper for visiting compound statements.
149
150    Python compound statements serve as containers for other statements. Thus,
151    when we encounter a new compound statement we start a new unwrapped line.
152
153    Arguments:
154      node: the node to visit.
155      substatement_names: set of node names. A compound statement will be
156        recognized as a NAME node with a name in this set.
157    """
158    for child in node.children:
159      # A pytree is structured in such a way that a single 'if_stmt' node will
160      # contain all the 'if', 'elif' and 'else' nodes as children (similar
161      # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
162      # we visit all children here and create a new line before the requested
163      # set of nodes.
164      if (child.type == grammar_token.NAME and
165          child.value in substatement_names):
166        self._StartNewLine()
167      self.Visit(child)
168
169  _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
170
171  def Visit_if_stmt(self, node):  # pylint: disable=invalid-name
172    self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
173
174  _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
175
176  def Visit_while_stmt(self, node):  # pylint: disable=invalid-name
177    self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
178
179  _FOR_STMT_ELEMS = frozenset({'for', 'else'})
180
181  def Visit_for_stmt(self, node):  # pylint: disable=invalid-name
182    self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
183
184  _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
185
186  def Visit_try_stmt(self, node):  # pylint: disable=invalid-name
187    self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
188
189  _EXCEPT_STMT_ELEMS = frozenset({'except'})
190
191  def Visit_except_clause(self, node):  # pylint: disable=invalid-name
192    self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
193
194  _FUNC_DEF_ELEMS = frozenset({'def'})
195
196  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
197    self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
198
199  def Visit_async_funcdef(self, node):  # pylint: disable=invalid-name
200    self._StartNewLine()
201    index = 0
202    for child in node.children:
203      index += 1
204      self.Visit(child)
205      if pytree_utils.NodeName(child) == 'ASYNC':
206        break
207    for child in node.children[index].children:
208      self.Visit(child)
209
210  _CLASS_DEF_ELEMS = frozenset({'class'})
211
212  def Visit_classdef(self, node):  # pylint: disable=invalid-name
213    self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
214
215  def Visit_async_stmt(self, node):  # pylint: disable=invalid-name
216    self._StartNewLine()
217    index = 0
218    for child in node.children:
219      index += 1
220      self.Visit(child)
221      if pytree_utils.NodeName(child) == 'ASYNC':
222        break
223    for child in node.children[index].children:
224      self.Visit(child)
225
226  def Visit_decorator(self, node):  # pylint: disable=invalid-name
227    for child in node.children:
228      self.Visit(child)
229      if (pytree_utils.NodeName(child) == 'COMMENT' and
230          child == node.children[0]):
231        self._StartNewLine()
232
233  def Visit_decorators(self, node):  # pylint: disable=invalid-name
234    for child in node.children:
235      self._StartNewLine()
236      self.Visit(child)
237
238  def Visit_decorated(self, node):  # pylint: disable=invalid-name
239    for child in node.children:
240      self._StartNewLine()
241      self.Visit(child)
242
243  _WITH_STMT_ELEMS = frozenset({'with'})
244
245  def Visit_with_stmt(self, node):  # pylint: disable=invalid-name
246    self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
247
248  def Visit_suite(self, node):  # pylint: disable=invalid-name
249    # A 'suite' starts a new indentation level in Python.
250    self._cur_depth += 1
251    self._StartNewLine()
252    self.DefaultNodeVisit(node)
253    self._cur_depth -= 1
254
255  def Visit_listmaker(self, node):  # pylint: disable=invalid-name
256    _DetermineMustSplitAnnotation(node)
257    self.DefaultNodeVisit(node)
258
259  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
260    _DetermineMustSplitAnnotation(node)
261    self.DefaultNodeVisit(node)
262
263  def Visit_import_as_names(self, node):  # pylint: disable=invalid-name
264    if node.prev_sibling.value == '(':
265      _DetermineMustSplitAnnotation(node)
266    self.DefaultNodeVisit(node)
267
268  def Visit_testlist_gexp(self, node):  # pylint: disable=invalid-name
269    _DetermineMustSplitAnnotation(node)
270    self.DefaultNodeVisit(node)
271
272  def Visit_arglist(self, node):  # pylint: disable=invalid-name
273    _DetermineMustSplitAnnotation(node)
274    self.DefaultNodeVisit(node)
275
276  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
277    _DetermineMustSplitAnnotation(node)
278    self.DefaultNodeVisit(node)
279
280  def DefaultLeafVisit(self, leaf):
281    """Default visitor for tree leaves.
282
283    A tree leaf is always just gets appended to the current unwrapped line.
284
285    Arguments:
286      leaf: the leaf to visit.
287    """
288    if leaf.type in _WHITESPACE_TOKENS:
289      self._StartNewLine()
290    elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
291      # Add non-whitespace tokens and comments that aren't empty.
292      self._cur_unwrapped_line.AppendNode(leaf)
293
294
295_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
296
297
298def _MatchBrackets(uwline):
299  """Visit the node and match the brackets.
300
301  For every open bracket ('[', '{', or '('), find the associated closing bracket
302  and "match" them up. I.e., save in the token a pointer to its associated open
303  or close bracket.
304
305  Arguments:
306    uwline: (UnwrappedLine) An unwrapped line.
307  """
308  bracket_stack = []
309  for token in uwline.tokens:
310    if token.value in pytree_utils.OPENING_BRACKETS:
311      bracket_stack.append(token)
312    elif token.value in pytree_utils.CLOSING_BRACKETS:
313      bracket_stack[-1].matching_bracket = token
314      token.matching_bracket = bracket_stack[-1]
315      bracket_stack.pop()
316
317    for bracket in bracket_stack:
318      if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node):
319        bracket.container_elements.append(token)
320        token.container_opening = bracket
321
322
323def _AdjustSplitPenalty(uwline):
324  """Visit the node and adjust the split penalties if needed.
325
326  A token shouldn't be split if it's not within a bracket pair. Mark any token
327  that's not within a bracket pair as "unbreakable".
328
329  Arguments:
330    uwline: (UnwrappedLine) An unwrapped line.
331  """
332  bracket_level = 0
333  for index, token in enumerate(uwline.tokens):
334    if index and not bracket_level:
335      pytree_utils.SetNodeAnnotation(token.node,
336                                     pytree_utils.Annotation.SPLIT_PENALTY,
337                                     split_penalty.UNBREAKABLE)
338    if token.value in pytree_utils.OPENING_BRACKETS:
339      bracket_level += 1
340    elif token.value in pytree_utils.CLOSING_BRACKETS:
341      bracket_level -= 1
342
343
344def _DetermineMustSplitAnnotation(node):
345  """Enforce a split in the list if the list ends with a comma."""
346  if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'):
347    return
348  if not _ContainsComments(node):
349    token = next(node.parent.leaves())
350    if token.value == '(':
351      if sum(1 for ch in node.children
352             if pytree_utils.NodeName(ch) == 'COMMA') < 2:
353        return
354    if (not isinstance(node.children[-1], pytree.Leaf) or
355        node.children[-1].value != ','):
356      return
357  num_children = len(node.children)
358  index = 0
359  _SetMustSplitOnFirstLeaf(node.children[0])
360  while index < num_children - 1:
361    child = node.children[index]
362    if isinstance(child, pytree.Leaf) and child.value == ',':
363      next_child = node.children[index + 1]
364      if next_child.type == grammar_token.COMMENT:
365        index += 1
366        if index >= num_children - 1:
367          break
368      _SetMustSplitOnFirstLeaf(node.children[index + 1])
369    index += 1
370
371
372def _ContainsComments(node):
373  """Return True if the list has a comment in it."""
374  if isinstance(node, pytree.Leaf):
375    return node.type == grammar_token.COMMENT
376  for child in node.children:
377    if _ContainsComments(child):
378      return True
379  return False
380
381
382def _SetMustSplitOnFirstLeaf(node):
383  """Set the "must split" annotation on the first leaf node."""
384  pytree_utils.SetNodeAnnotation(
385      pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT,
386      True)
387