1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Pytree nodes with extra formatting information.
15
16This is a thin wrapper around a pytree.Leaf node.
17"""
18
19import keyword
20import re
21
22from lib2to3.pgen2 import token
23
24from yapf.yapflib import py3compat
25from yapf.yapflib import pytree_utils
26from yapf.yapflib import style
27
28CONTINUATION = token.N_TOKENS
29
30
31class Subtype(object):
32  """Subtype information about tokens.
33
34  Gleaned from parsing the code. Helps determine the best formatting.
35  """
36  NONE = 0
37  UNARY_OPERATOR = 1
38  BINARY_OPERATOR = 2
39  SUBSCRIPT_COLON = 3
40  SUBSCRIPT_BRACKET = 4
41  DEFAULT_OR_NAMED_ASSIGN = 5
42  DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6
43  VARARGS_LIST = 7
44  VARARGS_STAR = 8
45  KWARGS_STAR_STAR = 9
46  ASSIGN_OPERATOR = 10
47  DICTIONARY_KEY = 11
48  DICTIONARY_KEY_PART = 12
49  DICTIONARY_VALUE = 13
50  DICT_SET_GENERATOR = 14
51  COMP_EXPR = 21
52  COMP_FOR = 15
53  COMP_IF = 16
54  FUNC_DEF = 17
55  DECORATOR = 18
56  TYPED_NAME = 19
57  TYPED_NAME_ARG_LIST = 20
58
59
60def _TabbedContinuationAlignPadding(spaces, align_style, tab_width,
61                                    continuation_indent_width):
62  """Build padding string for continuation alignment in tabbed indentation.
63
64  Arguments:
65    spaces: (int) The number of spaces to place before the token for alignment.
66    align_style: (str) The alignment style for continuation lines.
67    tab_width: (int) Number of columns of each tab character.
68    continuation_indent_width: (int) Indent columns for line continuations.
69
70  Returns:
71    A padding string for alignment with style specified by align_style option.
72  """
73  if align_style == 'FIXED':
74    if spaces > 0:
75      return '\t' * int(continuation_indent_width / tab_width)
76    return ''
77  elif align_style == 'VALIGN-RIGHT':
78    return '\t' * int((spaces + tab_width - 1) / tab_width)
79  return ' ' * spaces
80
81
82class FormatToken(object):
83  """A wrapper around pytree Leaf nodes.
84
85  This represents the token plus additional information useful for reformatting
86  the code.
87
88  Attributes:
89    next_token: The token in the unwrapped line after this token or None if this
90      is the last token in the unwrapped line.
91    previous_token: The token in the unwrapped line before this token or None if
92      this is the first token in the unwrapped line.
93    matching_bracket: If a bracket token ('[', '{', or '(') the matching
94      bracket.
95    container_opening: If the object is in a container, this points to its
96      opening bracket.
97    container_elements: If this is the start of a container, a list of the
98      elements in the container.
99    whitespace_prefix: The prefix for the whitespace.
100    spaces_required_before: The number of spaces required before a token. This
101      is a lower-bound for the formatter and not a hard requirement. For
102      instance, a comment may have n required spaces before it. But the
103      formatter won't place n spaces before all comments. Only those that are
104      moved to the end of a line of code. The formatter may use different
105      spacing when appropriate.
106    can_break_before: True if we're allowed to break before this token.
107    must_break_before: True if we're required to break before this token.
108    total_length: The total length of the unwrapped line up to and including
109      whitespace and this token. However, this doesn't include the initial
110      indentation amount.
111    split_penalty: The penalty for splitting the line before this token.
112  """
113
114  def __init__(self, node):
115    """Constructor.
116
117    Arguments:
118      node: (pytree.Leaf) The node that's being wrapped.
119    """
120    self.node = node
121    self.next_token = None
122    self.previous_token = None
123    self.matching_bracket = None
124    self.container_opening = None
125    self.container_elements = []
126    self.whitespace_prefix = ''
127    self.can_break_before = False
128    self.must_break_before = False
129    self.total_length = 0  # TODO(morbo): Think up a better name.
130    self.split_penalty = 0
131
132    if self.is_comment:
133      self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
134    else:
135      self.spaces_required_before = 0
136
137    if self.is_continuation:
138      self.value = self.node.value.rstrip()
139    else:
140      self.value = self.node.value
141
142  def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0):
143    """Register a token's whitespace prefix.
144
145    This is the whitespace that will be output before a token's string.
146
147    Arguments:
148      newlines_before: (int) The number of newlines to place before the token.
149      spaces: (int) The number of spaces to place before the token.
150      indent_level: (int) The indentation level.
151    """
152    if style.Get('USE_TABS'):
153      if newlines_before > 0:
154        indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding(
155            spaces, style.Get('CONTINUATION_ALIGN_STYLE'),
156            style.Get('INDENT_WIDTH'), style.Get('CONTINUATION_INDENT_WIDTH'))
157      else:
158        indent_before = '\t' * indent_level + ' ' * spaces
159    else:
160      indent_before = (
161          ' ' * indent_level * style.Get('INDENT_WIDTH') + ' ' * spaces)
162
163    if self.is_comment:
164      comment_lines = [s.lstrip() for s in self.value.splitlines()]
165      self.node.value = ('\n' + indent_before).join(comment_lines)
166
167      # Update our own value since we are changing node value
168      self.value = self.node.value
169
170    if not self.whitespace_prefix:
171      self.whitespace_prefix = (
172          '\n' * (self.newlines or newlines_before) + indent_before)
173    else:
174      self.whitespace_prefix += indent_before
175
176  def AdjustNewlinesBefore(self, newlines_before):
177    """Change the number of newlines before this token."""
178    self.whitespace_prefix = (
179        '\n' * newlines_before + self.whitespace_prefix.lstrip('\n'))
180
181  def RetainHorizontalSpacing(self, first_column, depth):
182    """Retains a token's horizontal spacing."""
183    previous = self.previous_token
184    if not previous:
185      return
186
187    if previous.is_pseudo_paren:
188      previous = previous.previous_token
189      if not previous:
190        return
191
192    cur_lineno = self.lineno
193    prev_lineno = previous.lineno
194    if previous.is_multiline_string:
195      prev_lineno += previous.value.count('\n')
196
197    if (cur_lineno != prev_lineno or
198        (previous.is_pseudo_paren and previous.value != ')' and
199         cur_lineno != previous.previous_token.lineno)):
200      self.spaces_required_before = (
201          self.column - first_column + depth * style.Get('INDENT_WIDTH'))
202      return
203
204    cur_column = self.node.column
205    prev_column = previous.node.column
206    prev_len = len(previous.value)
207
208    if previous.is_pseudo_paren and previous.value == ')':
209      prev_column -= 1
210      prev_len = 0
211
212    if previous.is_multiline_string:
213      prev_len = len(previous.value.split('\n')[-1])
214      if '\n' in previous.value:
215        prev_column = 0  # Last line starts in column 0.
216
217    self.spaces_required_before = cur_column - (prev_column + prev_len)
218
219  def OpensScope(self):
220    return self.value in pytree_utils.OPENING_BRACKETS
221
222  def ClosesScope(self):
223    return self.value in pytree_utils.CLOSING_BRACKETS
224
225  def __repr__(self):
226    msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value)
227    msg += ', pseudo)' if self.is_pseudo_paren else ')'
228    return msg
229
230  @property
231  @py3compat.lru_cache()
232  def node_split_penalty(self):
233    """Split penalty attached to the pytree node of this token."""
234    return pytree_utils.GetNodeAnnotation(
235        self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
236
237  @property
238  def newlines(self):
239    """The number of newlines needed before this token."""
240    return pytree_utils.GetNodeAnnotation(self.node,
241                                          pytree_utils.Annotation.NEWLINES)
242
243  @property
244  def must_split(self):
245    """Return true if the token requires a split before it."""
246    return pytree_utils.GetNodeAnnotation(self.node,
247                                          pytree_utils.Annotation.MUST_SPLIT)
248
249  @property
250  def column(self):
251    """The original column number of the node in the source."""
252    return self.node.column
253
254  @property
255  def lineno(self):
256    """The original line number of the node in the source."""
257    return self.node.lineno
258
259  @property
260  @py3compat.lru_cache()
261  def subtypes(self):
262    """Extra type information for directing formatting."""
263    value = pytree_utils.GetNodeAnnotation(self.node,
264                                           pytree_utils.Annotation.SUBTYPE)
265    return [Subtype.NONE] if value is None else value
266
267  @property
268  @py3compat.lru_cache()
269  def is_binary_op(self):
270    """Token is a binary operator."""
271    return Subtype.BINARY_OPERATOR in self.subtypes
272
273  @property
274  @py3compat.lru_cache()
275  def name(self):
276    """A string representation of the node's name."""
277    return pytree_utils.NodeName(self.node)
278
279  @property
280  def is_comment(self):
281    return self.node.type == token.COMMENT
282
283  @property
284  def is_continuation(self):
285    return self.node.type == CONTINUATION
286
287  @property
288  @py3compat.lru_cache()
289  def is_keyword(self):
290    return keyword.iskeyword(self.value)
291
292  @property
293  @py3compat.lru_cache()
294  def is_name(self):
295    return self.node.type == token.NAME and not self.is_keyword
296
297  @property
298  def is_number(self):
299    return self.node.type == token.NUMBER
300
301  @property
302  def is_string(self):
303    return self.node.type == token.STRING
304
305  @property
306  @py3compat.lru_cache()
307  def is_multiline_string(self):
308    """A multiline string."""
309    if py3compat.PY3:
310      prefix = '('
311      prefix += 'r|u|R|U|f|F|fr|Fr|fR|FR|rf|rF|Rf|RF'  # strings
312      prefix += '|b|B|br|Br|bR|BR|rb|rB|Rb|RB'  # bytes
313      prefix += ')?'
314    else:
315      prefix = '[uUbB]?[rR]?'
316
317    regex = r'^{prefix}(?P<delim>"""|\'\'\').*(?P=delim)$'.format(prefix=prefix)
318    return (self.is_string and
319            re.match(regex, self.value, re.DOTALL) is not None)
320
321  @property
322  @py3compat.lru_cache()
323  def is_docstring(self):
324    return self.is_multiline_string and not self.node.prev_sibling
325
326  @property
327  @py3compat.lru_cache()
328  def is_pseudo_paren(self):
329    return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo
330
331  @property
332  def is_pylint_comment(self):
333    return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
334                                        self.value)
335
336  @property
337  def is_pytype_comment(self):
338    return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=',
339                                        self.value)
340