1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Pytree nodes with extra formatting information. 15 16This is a thin wrapper around a pytree.Leaf node. 17""" 18 19import keyword 20import re 21 22from lib2to3.pgen2 import token 23 24from yapf.yapflib import py3compat 25from yapf.yapflib import pytree_utils 26from yapf.yapflib import style 27 28CONTINUATION = token.N_TOKENS 29 30 31class Subtype(object): 32 """Subtype information about tokens. 33 34 Gleaned from parsing the code. Helps determine the best formatting. 35 """ 36 NONE = 0 37 UNARY_OPERATOR = 1 38 BINARY_OPERATOR = 2 39 SUBSCRIPT_COLON = 3 40 SUBSCRIPT_BRACKET = 4 41 DEFAULT_OR_NAMED_ASSIGN = 5 42 DEFAULT_OR_NAMED_ASSIGN_ARG_LIST = 6 43 VARARGS_LIST = 7 44 VARARGS_STAR = 8 45 KWARGS_STAR_STAR = 9 46 ASSIGN_OPERATOR = 10 47 DICTIONARY_KEY = 11 48 DICTIONARY_KEY_PART = 12 49 DICTIONARY_VALUE = 13 50 DICT_SET_GENERATOR = 14 51 COMP_EXPR = 21 52 COMP_FOR = 15 53 COMP_IF = 16 54 FUNC_DEF = 17 55 DECORATOR = 18 56 TYPED_NAME = 19 57 TYPED_NAME_ARG_LIST = 20 58 59 60def _TabbedContinuationAlignPadding(spaces, align_style, tab_width, 61 continuation_indent_width): 62 """Build padding string for continuation alignment in tabbed indentation. 63 64 Arguments: 65 spaces: (int) The number of spaces to place before the token for alignment. 66 align_style: (str) The alignment style for continuation lines. 67 tab_width: (int) Number of columns of each tab character. 68 continuation_indent_width: (int) Indent columns for line continuations. 69 70 Returns: 71 A padding string for alignment with style specified by align_style option. 72 """ 73 if align_style == 'FIXED': 74 if spaces > 0: 75 return '\t' * int(continuation_indent_width / tab_width) 76 return '' 77 elif align_style == 'VALIGN-RIGHT': 78 return '\t' * int((spaces + tab_width - 1) / tab_width) 79 return ' ' * spaces 80 81 82class FormatToken(object): 83 """A wrapper around pytree Leaf nodes. 84 85 This represents the token plus additional information useful for reformatting 86 the code. 87 88 Attributes: 89 next_token: The token in the unwrapped line after this token or None if this 90 is the last token in the unwrapped line. 91 previous_token: The token in the unwrapped line before this token or None if 92 this is the first token in the unwrapped line. 93 matching_bracket: If a bracket token ('[', '{', or '(') the matching 94 bracket. 95 container_opening: If the object is in a container, this points to its 96 opening bracket. 97 container_elements: If this is the start of a container, a list of the 98 elements in the container. 99 whitespace_prefix: The prefix for the whitespace. 100 spaces_required_before: The number of spaces required before a token. This 101 is a lower-bound for the formatter and not a hard requirement. For 102 instance, a comment may have n required spaces before it. But the 103 formatter won't place n spaces before all comments. Only those that are 104 moved to the end of a line of code. The formatter may use different 105 spacing when appropriate. 106 can_break_before: True if we're allowed to break before this token. 107 must_break_before: True if we're required to break before this token. 108 total_length: The total length of the unwrapped line up to and including 109 whitespace and this token. However, this doesn't include the initial 110 indentation amount. 111 split_penalty: The penalty for splitting the line before this token. 112 """ 113 114 def __init__(self, node): 115 """Constructor. 116 117 Arguments: 118 node: (pytree.Leaf) The node that's being wrapped. 119 """ 120 self.node = node 121 self.next_token = None 122 self.previous_token = None 123 self.matching_bracket = None 124 self.container_opening = None 125 self.container_elements = [] 126 self.whitespace_prefix = '' 127 self.can_break_before = False 128 self.must_break_before = False 129 self.total_length = 0 # TODO(morbo): Think up a better name. 130 self.split_penalty = 0 131 132 if self.is_comment: 133 self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT') 134 else: 135 self.spaces_required_before = 0 136 137 if self.is_continuation: 138 self.value = self.node.value.rstrip() 139 else: 140 self.value = self.node.value 141 142 def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0): 143 """Register a token's whitespace prefix. 144 145 This is the whitespace that will be output before a token's string. 146 147 Arguments: 148 newlines_before: (int) The number of newlines to place before the token. 149 spaces: (int) The number of spaces to place before the token. 150 indent_level: (int) The indentation level. 151 """ 152 if style.Get('USE_TABS'): 153 if newlines_before > 0: 154 indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding( 155 spaces, style.Get('CONTINUATION_ALIGN_STYLE'), 156 style.Get('INDENT_WIDTH'), style.Get('CONTINUATION_INDENT_WIDTH')) 157 else: 158 indent_before = '\t' * indent_level + ' ' * spaces 159 else: 160 indent_before = ( 161 ' ' * indent_level * style.Get('INDENT_WIDTH') + ' ' * spaces) 162 163 if self.is_comment: 164 comment_lines = [s.lstrip() for s in self.value.splitlines()] 165 self.node.value = ('\n' + indent_before).join(comment_lines) 166 167 # Update our own value since we are changing node value 168 self.value = self.node.value 169 170 if not self.whitespace_prefix: 171 self.whitespace_prefix = ( 172 '\n' * (self.newlines or newlines_before) + indent_before) 173 else: 174 self.whitespace_prefix += indent_before 175 176 def AdjustNewlinesBefore(self, newlines_before): 177 """Change the number of newlines before this token.""" 178 self.whitespace_prefix = ( 179 '\n' * newlines_before + self.whitespace_prefix.lstrip('\n')) 180 181 def RetainHorizontalSpacing(self, first_column, depth): 182 """Retains a token's horizontal spacing.""" 183 previous = self.previous_token 184 if not previous: 185 return 186 187 if previous.is_pseudo_paren: 188 previous = previous.previous_token 189 if not previous: 190 return 191 192 cur_lineno = self.lineno 193 prev_lineno = previous.lineno 194 if previous.is_multiline_string: 195 prev_lineno += previous.value.count('\n') 196 197 if (cur_lineno != prev_lineno or 198 (previous.is_pseudo_paren and previous.value != ')' and 199 cur_lineno != previous.previous_token.lineno)): 200 self.spaces_required_before = ( 201 self.column - first_column + depth * style.Get('INDENT_WIDTH')) 202 return 203 204 cur_column = self.node.column 205 prev_column = previous.node.column 206 prev_len = len(previous.value) 207 208 if previous.is_pseudo_paren and previous.value == ')': 209 prev_column -= 1 210 prev_len = 0 211 212 if previous.is_multiline_string: 213 prev_len = len(previous.value.split('\n')[-1]) 214 if '\n' in previous.value: 215 prev_column = 0 # Last line starts in column 0. 216 217 self.spaces_required_before = cur_column - (prev_column + prev_len) 218 219 def OpensScope(self): 220 return self.value in pytree_utils.OPENING_BRACKETS 221 222 def ClosesScope(self): 223 return self.value in pytree_utils.CLOSING_BRACKETS 224 225 def __repr__(self): 226 msg = 'FormatToken(name={0}, value={1}'.format(self.name, self.value) 227 msg += ', pseudo)' if self.is_pseudo_paren else ')' 228 return msg 229 230 @property 231 @py3compat.lru_cache() 232 def node_split_penalty(self): 233 """Split penalty attached to the pytree node of this token.""" 234 return pytree_utils.GetNodeAnnotation( 235 self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0) 236 237 @property 238 def newlines(self): 239 """The number of newlines needed before this token.""" 240 return pytree_utils.GetNodeAnnotation(self.node, 241 pytree_utils.Annotation.NEWLINES) 242 243 @property 244 def must_split(self): 245 """Return true if the token requires a split before it.""" 246 return pytree_utils.GetNodeAnnotation(self.node, 247 pytree_utils.Annotation.MUST_SPLIT) 248 249 @property 250 def column(self): 251 """The original column number of the node in the source.""" 252 return self.node.column 253 254 @property 255 def lineno(self): 256 """The original line number of the node in the source.""" 257 return self.node.lineno 258 259 @property 260 @py3compat.lru_cache() 261 def subtypes(self): 262 """Extra type information for directing formatting.""" 263 value = pytree_utils.GetNodeAnnotation(self.node, 264 pytree_utils.Annotation.SUBTYPE) 265 return [Subtype.NONE] if value is None else value 266 267 @property 268 @py3compat.lru_cache() 269 def is_binary_op(self): 270 """Token is a binary operator.""" 271 return Subtype.BINARY_OPERATOR in self.subtypes 272 273 @property 274 @py3compat.lru_cache() 275 def name(self): 276 """A string representation of the node's name.""" 277 return pytree_utils.NodeName(self.node) 278 279 @property 280 def is_comment(self): 281 return self.node.type == token.COMMENT 282 283 @property 284 def is_continuation(self): 285 return self.node.type == CONTINUATION 286 287 @property 288 @py3compat.lru_cache() 289 def is_keyword(self): 290 return keyword.iskeyword(self.value) 291 292 @property 293 @py3compat.lru_cache() 294 def is_name(self): 295 return self.node.type == token.NAME and not self.is_keyword 296 297 @property 298 def is_number(self): 299 return self.node.type == token.NUMBER 300 301 @property 302 def is_string(self): 303 return self.node.type == token.STRING 304 305 @property 306 @py3compat.lru_cache() 307 def is_multiline_string(self): 308 """A multiline string.""" 309 if py3compat.PY3: 310 prefix = '(' 311 prefix += 'r|u|R|U|f|F|fr|Fr|fR|FR|rf|rF|Rf|RF' # strings 312 prefix += '|b|B|br|Br|bR|BR|rb|rB|Rb|RB' # bytes 313 prefix += ')?' 314 else: 315 prefix = '[uUbB]?[rR]?' 316 317 regex = r'^{prefix}(?P<delim>"""|\'\'\').*(?P=delim)$'.format(prefix=prefix) 318 return (self.is_string and 319 re.match(regex, self.value, re.DOTALL) is not None) 320 321 @property 322 @py3compat.lru_cache() 323 def is_docstring(self): 324 return self.is_multiline_string and not self.node.prev_sibling 325 326 @property 327 @py3compat.lru_cache() 328 def is_pseudo_paren(self): 329 return hasattr(self.node, 'is_pseudo') and self.node.is_pseudo 330 331 @property 332 def is_pylint_comment(self): 333 return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=', 334 self.value) 335 336 @property 337 def is_pytype_comment(self): 338 return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=', 339 self.value) 340