1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Subtype assigner for lib2to3 trees.
15
16This module assigns extra type information to the lib2to3 trees. This
17information is more specific than whether something is an operator or an
18identifier. For instance, it can specify if a node in the tree is part of a
19subscript.
20
21  AssignSubtypes(): the main function exported by this module.
22
23Annotations:
24  subtype: The subtype of a pytree token. See 'format_token' module for a list
25      of subtypes.
26"""
27
28from lib2to3 import pytree
29from lib2to3.pgen2 import token
30from lib2to3.pygram import python_symbols as syms
31
32from yapf.yapflib import format_token
33from yapf.yapflib import pytree_utils
34from yapf.yapflib import pytree_visitor
35from yapf.yapflib import style
36
37
38def AssignSubtypes(tree):
39  """Run the subtype assigner visitor over the tree, modifying it in place.
40
41  Arguments:
42    tree: the top-level pytree node to annotate with subtypes.
43  """
44  subtype_assigner = _SubtypeAssigner()
45  subtype_assigner.Visit(tree)
46
47
48# Map tokens in argument lists to their respective subtype.
49_ARGLIST_TOKEN_TO_SUBTYPE = {
50    '=': format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
51    ':': format_token.Subtype.TYPED_NAME,
52    '*': format_token.Subtype.VARARGS_STAR,
53    '**': format_token.Subtype.KWARGS_STAR_STAR,
54}
55
56
57class _SubtypeAssigner(pytree_visitor.PyTreeVisitor):
58  """_SubtypeAssigner - see file-level docstring for detailed description.
59
60  The subtype is added as an annotation to the pytree token.
61  """
62
63  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
64    # dictsetmaker ::= (test ':' test (comp_for |
65    #                                   (',' test ':' test)* [','])) |
66    #                  (test (comp_for | (',' test)* [',']))
67    for child in node.children:
68      self.Visit(child)
69
70    comp_for = False
71    dict_maker = False
72
73    for child in node.children:
74      if pytree_utils.NodeName(child) == 'comp_for':
75        comp_for = True
76        _AppendFirstLeafTokenSubtype(child,
77                                     format_token.Subtype.DICT_SET_GENERATOR)
78      elif pytree_utils.NodeName(child) in ('COLON', 'DOUBLESTAR'):
79        dict_maker = True
80
81    if not comp_for and dict_maker:
82      last_was_colon = False
83      unpacking = False
84      for child in node.children:
85        if pytree_utils.NodeName(child) == 'DOUBLESTAR':
86          _AppendFirstLeafTokenSubtype(child,
87                                       format_token.Subtype.KWARGS_STAR_STAR)
88        if last_was_colon:
89          if style.Get('INDENT_DICTIONARY_VALUE'):
90            _InsertPseudoParentheses(child)
91          else:
92            _AppendFirstLeafTokenSubtype(child,
93                                         format_token.Subtype.DICTIONARY_VALUE)
94        elif (isinstance(child, pytree.Node) or
95              (not child.value.startswith('#') and child.value not in '{:,')):
96          # Mark the first leaf of a key entry as a DICTIONARY_KEY. We
97          # normally want to split before them if the dictionary cannot exist
98          # on a single line.
99          if not unpacking or pytree_utils.FirstLeafNode(child).value == '**':
100            _AppendFirstLeafTokenSubtype(child,
101                                         format_token.Subtype.DICTIONARY_KEY)
102          _AppendSubtypeRec(child, format_token.Subtype.DICTIONARY_KEY_PART)
103        last_was_colon = pytree_utils.NodeName(child) == 'COLON'
104        if pytree_utils.NodeName(child) == 'DOUBLESTAR':
105          unpacking = True
106        elif last_was_colon:
107          unpacking = False
108
109  def Visit_expr_stmt(self, node):  # pylint: disable=invalid-name
110    # expr_stmt ::= testlist_star_expr (augassign (yield_expr|testlist)
111    #               | ('=' (yield_expr|testlist_star_expr))*)
112    for child in node.children:
113      self.Visit(child)
114      if isinstance(child, pytree.Leaf) and child.value == '=':
115        _AppendTokenSubtype(child, format_token.Subtype.ASSIGN_OPERATOR)
116
117  def Visit_or_test(self, node):  # pylint: disable=invalid-name
118    # or_test ::= and_test ('or' and_test)*
119    for child in node.children:
120      self.Visit(child)
121      if isinstance(child, pytree.Leaf) and child.value == 'or':
122        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
123
124  def Visit_and_test(self, node):  # pylint: disable=invalid-name
125    # and_test ::= not_test ('and' not_test)*
126    for child in node.children:
127      self.Visit(child)
128      if isinstance(child, pytree.Leaf) and child.value == 'and':
129        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
130
131  def Visit_not_test(self, node):  # pylint: disable=invalid-name
132    # not_test ::= 'not' not_test | comparison
133    for child in node.children:
134      self.Visit(child)
135      if isinstance(child, pytree.Leaf) and child.value == 'not':
136        _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
137
138  def Visit_comparison(self, node):  # pylint: disable=invalid-name
139    # comparison ::= expr (comp_op expr)*
140    # comp_op ::= '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not in'|'is'|'is not'
141    for child in node.children:
142      self.Visit(child)
143      if (isinstance(child, pytree.Leaf) and
144          child.value in {'<', '>', '==', '>=', '<=', '<>', '!=', 'in', 'is'}):
145        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
146      elif pytree_utils.NodeName(child) == 'comp_op':
147        for grandchild in child.children:
148          _AppendTokenSubtype(grandchild, format_token.Subtype.BINARY_OPERATOR)
149
150  def Visit_star_expr(self, node):  # pylint: disable=invalid-name
151    # star_expr ::= '*' expr
152    for child in node.children:
153      self.Visit(child)
154      if isinstance(child, pytree.Leaf) and child.value == '*':
155        _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
156        _AppendTokenSubtype(child, format_token.Subtype.VARARGS_STAR)
157
158  def Visit_expr(self, node):  # pylint: disable=invalid-name
159    # expr ::= xor_expr ('|' xor_expr)*
160    for child in node.children:
161      self.Visit(child)
162      if isinstance(child, pytree.Leaf) and child.value == '|':
163        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
164
165  def Visit_xor_expr(self, node):  # pylint: disable=invalid-name
166    # xor_expr ::= and_expr ('^' and_expr)*
167    for child in node.children:
168      self.Visit(child)
169      if isinstance(child, pytree.Leaf) and child.value == '^':
170        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
171
172  def Visit_and_expr(self, node):  # pylint: disable=invalid-name
173    # and_expr ::= shift_expr ('&' shift_expr)*
174    for child in node.children:
175      self.Visit(child)
176      if isinstance(child, pytree.Leaf) and child.value == '&':
177        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
178
179  def Visit_shift_expr(self, node):  # pylint: disable=invalid-name
180    # shift_expr ::= arith_expr (('<<'|'>>') arith_expr)*
181    for child in node.children:
182      self.Visit(child)
183      if isinstance(child, pytree.Leaf) and child.value in {'<<', '>>'}:
184        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
185
186  def Visit_arith_expr(self, node):  # pylint: disable=invalid-name
187    # arith_expr ::= term (('+'|'-') term)*
188    for child in node.children:
189      self.Visit(child)
190      if isinstance(child, pytree.Leaf) and child.value in '+-':
191        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
192
193  def Visit_term(self, node):  # pylint: disable=invalid-name
194    # term ::= factor (('*'|'/'|'%'|'//') factor)*
195    for child in node.children:
196      self.Visit(child)
197      if (isinstance(child, pytree.Leaf) and
198          child.value in {'*', '/', '%', '//'}):
199        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
200
201  def Visit_factor(self, node):  # pylint: disable=invalid-name
202    # factor ::= ('+'|'-'|'~') factor | power
203    for child in node.children:
204      self.Visit(child)
205      if isinstance(child, pytree.Leaf) and child.value in '+-~':
206        _AppendTokenSubtype(child, format_token.Subtype.UNARY_OPERATOR)
207
208  def Visit_power(self, node):  # pylint: disable=invalid-name
209    # power ::= atom trailer* ['**' factor]
210    for child in node.children:
211      self.Visit(child)
212      if isinstance(child, pytree.Leaf) and child.value == '**':
213        _AppendTokenSubtype(child, format_token.Subtype.BINARY_OPERATOR)
214
215  def Visit_trailer(self, node):  # pylint: disable=invalid-name
216    for child in node.children:
217      self.Visit(child)
218      if isinstance(child, pytree.Leaf) and child.value in '[]':
219        _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_BRACKET)
220
221  def Visit_subscript(self, node):  # pylint: disable=invalid-name
222    # subscript ::= test | [test] ':' [test] [sliceop]
223    for child in node.children:
224      self.Visit(child)
225      if isinstance(child, pytree.Leaf) and child.value == ':':
226        _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON)
227
228  def Visit_sliceop(self, node):  # pylint: disable=invalid-name
229    # sliceop ::= ':' [test]
230    for child in node.children:
231      self.Visit(child)
232      if isinstance(child, pytree.Leaf) and child.value == ':':
233        _AppendTokenSubtype(child, format_token.Subtype.SUBSCRIPT_COLON)
234
235  def Visit_argument(self, node):  # pylint: disable=invalid-name
236    # argument ::=
237    #     test [comp_for] | test '=' test
238    self._ProcessArgLists(node)
239
240  def Visit_arglist(self, node):  # pylint: disable=invalid-name
241    # arglist ::=
242    #     (argument ',')* (argument [',']
243    #                     | '*' test (',' argument)* [',' '**' test]
244    #                     | '**' test)
245    self._ProcessArgLists(node)
246    _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
247                       format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST)
248
249  def Visit_tname(self, node):  # pylint: disable=invalid-name
250    self._ProcessArgLists(node)
251    _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
252                       format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST)
253
254  def Visit_decorator(self, node):  # pylint: disable=invalid-name
255    # decorator ::=
256    #     '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
257    for child in node.children:
258      if isinstance(child, pytree.Leaf) and child.value == '@':
259        _AppendTokenSubtype(child, subtype=format_token.Subtype.DECORATOR)
260      self.Visit(child)
261
262  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
263    # funcdef ::=
264    #     'def' NAME parameters ['->' test] ':' suite
265    for child in node.children:
266      if pytree_utils.NodeName(child) == 'NAME' and child.value != 'def':
267        _AppendTokenSubtype(child, format_token.Subtype.FUNC_DEF)
268        break
269    for child in node.children:
270      self.Visit(child)
271
272  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
273    # typedargslist ::=
274    #     ((tfpdef ['=' test] ',')*
275    #          ('*' [tname] (',' tname ['=' test])* [',' '**' tname]
276    #           | '**' tname)
277    #     | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
278    self._ProcessArgLists(node)
279    _SetArgListSubtype(node, format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN,
280                       format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST)
281    tname = False
282    for child in node.children:
283      if pytree_utils.NodeName(child) == 'tname':
284        tname = True
285        _SetArgListSubtype(child, format_token.Subtype.TYPED_NAME,
286                           format_token.Subtype.TYPED_NAME_ARG_LIST)
287      if not isinstance(child, pytree.Leaf):
288        continue
289      if child.value == ',':
290        tname = False
291      elif child.value == '=' and tname:
292        _AppendTokenSubtype(child, subtype=format_token.Subtype.TYPED_NAME)
293        tname = False
294
295  def Visit_varargslist(self, node):  # pylint: disable=invalid-name
296    # varargslist ::=
297    #     ((vfpdef ['=' test] ',')*
298    #          ('*' [vname] (',' vname ['=' test])*  [',' '**' vname]
299    #           | '**' vname)
300    #      | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
301    self._ProcessArgLists(node)
302    for child in node.children:
303      self.Visit(child)
304      if isinstance(child, pytree.Leaf) and child.value == '=':
305        _AppendTokenSubtype(child, format_token.Subtype.VARARGS_LIST)
306
307  def Visit_comp_for(self, node):  # pylint: disable=invalid-name
308    # comp_for ::= 'for' exprlist 'in' testlist_safe [comp_iter]
309    _AppendSubtypeRec(node, format_token.Subtype.COMP_FOR)
310    # Mark the previous node as COMP_EXPR unless this is a nested comprehension
311    # as these will have the outer comprehension as their previous node.
312    attr = pytree_utils.GetNodeAnnotation(node.parent,
313                                          pytree_utils.Annotation.SUBTYPE)
314    if not attr or format_token.Subtype.COMP_FOR not in attr:
315      _AppendSubtypeRec(node.parent.children[0], format_token.Subtype.COMP_EXPR)
316    self.DefaultNodeVisit(node)
317
318  def Visit_comp_if(self, node):  # pylint: disable=invalid-name
319    # comp_if ::= 'if' old_test [comp_iter]
320    _AppendSubtypeRec(node, format_token.Subtype.COMP_IF)
321    self.DefaultNodeVisit(node)
322
323  def _ProcessArgLists(self, node):
324    """Common method for processing argument lists."""
325    for child in node.children:
326      self.Visit(child)
327      if isinstance(child, pytree.Leaf):
328        _AppendTokenSubtype(
329            child,
330            subtype=_ARGLIST_TOKEN_TO_SUBTYPE.get(child.value,
331                                                  format_token.Subtype.NONE))
332
333
334def _SetArgListSubtype(node, node_subtype, list_subtype):
335  """Set named assign subtype on elements in a arg list."""
336
337  def HasSubtype(node):
338    """Return True if the arg list has a named assign subtype."""
339    if isinstance(node, pytree.Leaf):
340      if node_subtype in pytree_utils.GetNodeAnnotation(
341          node, pytree_utils.Annotation.SUBTYPE, set()):
342        return True
343      return False
344    has_subtype = False
345    for child in node.children:
346      if pytree_utils.NodeName(child) != 'arglist':
347        has_subtype |= HasSubtype(child)
348    return has_subtype
349
350  if HasSubtype(node):
351    for child in node.children:
352      if pytree_utils.NodeName(child) != 'COMMA':
353        _AppendFirstLeafTokenSubtype(child, list_subtype)
354
355
356def _AppendTokenSubtype(node, subtype):
357  """Append the token's subtype only if it's not already set."""
358  pytree_utils.AppendNodeAnnotation(node, pytree_utils.Annotation.SUBTYPE,
359                                    subtype)
360
361
362def _AppendFirstLeafTokenSubtype(node, subtype):
363  """Append the first leaf token's subtypes."""
364  if isinstance(node, pytree.Leaf):
365    _AppendTokenSubtype(node, subtype)
366    return
367  _AppendFirstLeafTokenSubtype(node.children[0], subtype)
368
369
370def _AppendSubtypeRec(node, subtype, force=True):
371  """Append the leafs in the node to the given subtype."""
372  if isinstance(node, pytree.Leaf):
373    _AppendTokenSubtype(node, subtype)
374    return
375  for child in node.children:
376    _AppendSubtypeRec(child, subtype, force=force)
377
378
379def _InsertPseudoParentheses(node):
380  """Insert pseudo parentheses so that dicts can be formatted correctly."""
381  comment_node = None
382  if isinstance(node, pytree.Node):
383    if node.children[-1].type == token.COMMENT:
384      comment_node = node.children[-1].clone()
385      node.children[-1].remove()
386
387  first = pytree_utils.FirstLeafNode(node)
388  last = pytree_utils.LastLeafNode(node)
389
390  if first == last and first.type == token.COMMENT:
391    # A comment was inserted before the value, which is a pytree.Leaf.
392    # Encompass the dictionary's value into an ATOM node.
393    last = first.next_sibling
394    new_node = pytree.Node(syms.atom, [first.clone(), last.clone()])
395    node.replace(new_node)
396    node = new_node
397    last.remove()
398
399    first = pytree_utils.FirstLeafNode(node)
400    last = pytree_utils.LastLeafNode(node)
401
402  lparen = pytree.Leaf(
403      token.LPAR, u'(', context=('', (first.get_lineno(), first.column - 1)))
404  last_lineno = last.get_lineno()
405  if last.type == token.STRING and '\n' in last.value:
406    last_lineno += last.value.count('\n')
407
408  if last.type == token.STRING and '\n' in last.value:
409    last_column = len(last.value.split('\n')[-1]) + 1
410  else:
411    last_column = last.column + len(last.value) + 1
412  rparen = pytree.Leaf(
413      token.RPAR, u')', context=('', (last_lineno, last_column)))
414
415  lparen.is_pseudo = True
416  rparen.is_pseudo = True
417
418  if isinstance(node, pytree.Node):
419    node.insert_child(0, lparen)
420    node.append_child(rparen)
421    if comment_node:
422      node.append_child(comment_node)
423    _AppendFirstLeafTokenSubtype(node, format_token.Subtype.DICTIONARY_VALUE)
424  else:
425    clone = node.clone()
426    new_node = pytree.Node(syms.atom, [lparen, clone, rparen])
427    node.replace(new_node)
428    _AppendFirstLeafTokenSubtype(clone, format_token.Subtype.DICTIONARY_VALUE)
429