1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""pytree-related utilities.
15
16This module collects various utilities related to the parse trees produced by
17the lib2to3 library.
18
19  NodeName(): produces a string name for pytree nodes.
20  ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21                     a given string with code to a pytree.
22  InsertNodeBefore(): insert a node before another in a pytree.
23  InsertNodeAfter(): insert a node after another in a pytree.
24  {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
25"""
26
27import ast
28
29from lib2to3 import pygram
30from lib2to3 import pytree
31from lib2to3.pgen2 import driver
32from lib2to3.pgen2 import parse
33from lib2to3.pgen2 import token
34
35# TODO(eliben): We may want to get rid of this filtering at some point once we
36# have a better understanding of what information we need from the tree. Then,
37# these tokens may be filtered out from the tree before the tree gets to the
38# unwrapper.
39NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
40
41OPENING_BRACKETS = frozenset({'(', '[', '{'})
42CLOSING_BRACKETS = frozenset({')', ']', '}'})
43
44
45class Annotation(object):
46  """Annotation names associated with pytrees."""
47  CHILD_INDENT = 'child_indent'
48  NEWLINES = 'newlines'
49  MUST_SPLIT = 'must_split'
50  SPLIT_PENALTY = 'split_penalty'
51  SUBTYPE = 'subtype'
52
53
54def NodeName(node):
55  """Produce a string name for a given node.
56
57  For a Leaf this is the token name, and for a Node this is the type.
58
59  Arguments:
60    node: a tree node
61
62  Returns:
63    Name as a string.
64  """
65  # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
66  if node.type < 256:
67    return token.tok_name[node.type]
68  else:
69    return pygram.python_grammar.number2symbol[node.type]
70
71
72def FirstLeafNode(node):
73  if isinstance(node, pytree.Leaf):
74    return node
75  return FirstLeafNode(node.children[0])
76
77
78def LastLeafNode(node):
79  if isinstance(node, pytree.Leaf):
80    return node
81  return LastLeafNode(node.children[-1])
82
83
84# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
85# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
86# context where a keyword is disallowed).
87# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
88# monkey-patching.
89_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
90del _GRAMMAR_FOR_PY3.keywords['exec']
91
92_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
93del _GRAMMAR_FOR_PY2.keywords['nonlocal']
94
95
96def ParseCodeToTree(code):
97  """Parse the given code to a lib2to3 pytree.
98
99  Arguments:
100    code: a string with the code to parse.
101
102  Raises:
103    SyntaxError if the code is invalid syntax.
104    parse.ParseError if some other parsing failure.
105
106  Returns:
107    The root node of the parsed tree.
108  """
109  # This function is tiny, but the incantation for invoking the parser correctly
110  # is sufficiently magical to be worth abstracting away.
111  try:
112    # Try to parse using a Python 3 grammar, which is more permissive (print and
113    # exec are not keywords).
114    parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
115    tree = parser_driver.parse_string(code, debug=False)
116  except parse.ParseError:
117    # Now try to parse using a Python 2 grammar; If this fails, then
118    # there's something else wrong with the code.
119    try:
120      parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
121      tree = parser_driver.parse_string(code, debug=False)
122    except parse.ParseError:
123      # Raise a syntax error if the code is invalid python syntax.
124      try:
125        ast.parse(code)
126      except SyntaxError as e:
127        raise e
128      else:
129        raise
130  return _WrapEndMarker(tree)
131
132
133def _WrapEndMarker(tree):
134  """Wrap a single ENDMARKER token in a "file_input" node.
135
136  Arguments:
137    tree: (pytree.Node) The root node of the parsed tree.
138
139  Returns:
140    The root node of the parsed tree. If the tree is a single ENDMARKER node,
141    then that node is wrapped in a "file_input" node. That will ensure we don't
142    skip comments attached to that node.
143  """
144  if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
145    return pytree.Node(pygram.python_symbols.file_input, [tree])
146  return tree
147
148
149def InsertNodesBefore(new_nodes, target):
150  """Insert new_nodes before the given target location in the tree.
151
152  Arguments:
153    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
154      tree).
155    target: the target node before which the new node node will be inserted.
156
157  Raises:
158    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
159  """
160  for node in new_nodes:
161    _InsertNodeAt(node, target, after=False)
162
163
164def InsertNodesAfter(new_nodes, target):
165  """Insert new_nodes after the given target location in the tree.
166
167  Arguments:
168    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
169      tree).
170    target: the target node after which the new node node will be inserted.
171
172  Raises:
173    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
174  """
175  for node in reversed(new_nodes):
176    _InsertNodeAt(node, target, after=True)
177
178
179def _InsertNodeAt(new_node, target, after=False):
180  """Underlying implementation for node insertion.
181
182  Arguments:
183    new_node: a new node to insert (this node should not be in the tree).
184    target: the target node.
185    after: if True, new_node is inserted after target. Otherwise, it's inserted
186      before target.
187
188  Returns:
189    nothing
190
191  Raises:
192    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
193  """
194
195  # Protect against attempts to insert nodes which already belong to some tree.
196  if new_node.parent is not None:
197    raise RuntimeError('inserting node which already has a parent',
198                       (new_node, new_node.parent))
199
200  # The code here is based on pytree.Base.next_sibling
201  parent_of_target = target.parent
202  if parent_of_target is None:
203    raise RuntimeError('expected target node to have a parent', (target,))
204
205  for i, child in enumerate(parent_of_target.children):
206    if child is target:
207      insertion_index = i + 1 if after else i
208      parent_of_target.insert_child(insertion_index, new_node)
209      return
210
211  raise RuntimeError('unable to find insertion point for target node',
212                     (target,))
213
214
215# The following constant and functions implement a simple custom annotation
216# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
217# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
218# managed through GetNodeAnnotation and SetNodeAnnotation.
219_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
220
221
222def GetNodeAnnotation(node, annotation, default=None):
223  """Get annotation value from a node.
224
225  Arguments:
226    node: the node.
227    annotation: annotation name - a string.
228    default: the default value to return if there's no annotation.
229
230  Returns:
231    Value of the annotation in the given node. If the node doesn't have this
232    particular annotation name yet, returns default.
233  """
234  return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
235
236
237def SetNodeAnnotation(node, annotation, value):
238  """Set annotation value on a node.
239
240  Arguments:
241    node: the node.
242    annotation: annotation name - a string.
243    value: annotation value to set.
244  """
245  setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
246
247
248def AppendNodeAnnotation(node, annotation, value):
249  """Appends an annotation value to a list of annotations on the node.
250
251  Arguments:
252    node: the node.
253    annotation: annotation name - a string.
254    value: annotation value to set.
255  """
256  attr = GetNodeAnnotation(node, annotation, set())
257  attr.add(value)
258  SetNodeAnnotation(node, annotation, attr)
259
260
261def RemoveSubtypeAnnotation(node, value):
262  """Removes an annotation value from the subtype annotations on the node.
263
264  Arguments:
265    node: the node.
266    value: annotation value to remove.
267  """
268  attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
269  if attr and value in attr:
270    attr.remove(value)
271    SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
272
273
274def GetOpeningBracket(node):
275  """Get opening bracket value from a node.
276
277  Arguments:
278    node: the node.
279
280  Returns:
281    The opening bracket node or None if it couldn't find one.
282  """
283  return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None)
284
285
286def SetOpeningBracket(node, bracket):
287  """Set opening bracket value for a node.
288
289  Arguments:
290    node: the node.
291    bracket: opening bracket to set.
292  """
293  setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket)
294
295
296def DumpNodeToString(node):
297  """Dump a string representation of the given node. For debugging.
298
299  Arguments:
300    node: the node.
301
302  Returns:
303    The string representation.
304  """
305  if isinstance(node, pytree.Leaf):
306    fmt = ('{name}({value}) [lineno={lineno}, column={column}, '
307           'prefix={prefix}, penalty={penalty}]')
308    return fmt.format(
309        name=NodeName(node),
310        value=_PytreeNodeRepr(node),
311        lineno=node.lineno,
312        column=node.column,
313        prefix=repr(node.prefix),
314        penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None))
315  else:
316    fmt = '{node} [{len} children] [child_indent="{indent}"]'
317    return fmt.format(
318        node=NodeName(node),
319        len=len(node.children),
320        indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
321
322
323def _PytreeNodeRepr(node):
324  """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
325  if isinstance(node, pytree.Node):
326    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
327                           [_PytreeNodeRepr(c) for c in node.children])
328  if isinstance(node, pytree.Leaf):
329    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
330
331
332def IsCommentStatement(node):
333  return (NodeName(node) == 'simple_stmt' and
334          node.children[0].type == token.COMMENT)
335