1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""UnwrappedLine primitive for formatting. 15 16An unwrapped line is the containing data structure produced by the parser. It 17collects all nodes (stored in FormatToken objects) that could appear on a 18single line if there were no line length restrictions. It's then used by the 19parser to perform the wrapping required to comply with the style guide. 20""" 21 22from yapf.yapflib import format_token 23from yapf.yapflib import py3compat 24from yapf.yapflib import pytree_utils 25from yapf.yapflib import split_penalty 26from yapf.yapflib import style 27 28 29class UnwrappedLine(object): 30 """Represents a single unwrapped line in the output. 31 32 Attributes: 33 depth: indentation depth of this line. This is just a numeric value used to 34 distinguish lines that are more deeply nested than others. It is not the 35 actual amount of spaces, which is style-dependent. 36 """ 37 38 def __init__(self, depth, tokens=None): 39 """Constructor. 40 41 Creates a new unwrapped line with the given depth an initial list of tokens. 42 Constructs the doubly-linked lists for format tokens using their built-in 43 next_token and previous_token attributes. 44 45 Arguments: 46 depth: indentation depth of this line 47 tokens: initial list of tokens 48 """ 49 self.depth = depth 50 self._tokens = tokens or [] 51 self.disable = False 52 53 if self._tokens: 54 # Set up a doubly linked list. 55 for index, tok in enumerate(self._tokens[1:]): 56 # Note, 'index' is the index to the previous token. 57 tok.previous_token = self._tokens[index] 58 self._tokens[index].next_token = tok 59 60 def CalculateFormattingInformation(self): 61 """Calculate the split penalty and total length for the tokens.""" 62 # Say that the first token in the line should have a space before it. This 63 # means only that if this unwrapped line is joined with a predecessor line, 64 # then there will be a space between them. 65 self.first.spaces_required_before = 1 66 self.first.total_length = len(self.first.value) 67 68 prev_token = self.first 69 prev_length = self.first.total_length 70 for token in self._tokens[1:]: 71 if (token.spaces_required_before == 0 and 72 _SpaceRequiredBetween(prev_token, token)): 73 token.spaces_required_before = 1 74 75 tok_len = len(token.value) if not token.is_pseudo_paren else 0 76 token.total_length = prev_length + tok_len + token.spaces_required_before 77 78 # The split penalty has to be computed before {must|can}_break_before, 79 # because these may use it for their decision. 80 token.split_penalty += _SplitPenalty(prev_token, token) 81 token.must_break_before = _MustBreakBefore(prev_token, token) 82 token.can_break_before = ( 83 token.must_break_before or _CanBreakBefore(prev_token, token)) 84 85 prev_length = token.total_length 86 prev_token = token 87 88 def Split(self): 89 """Split the line at semicolons.""" 90 if not self.has_semicolon or self.disable: 91 return [self] 92 93 uwlines = [] 94 uwline = UnwrappedLine(self.depth) 95 for tok in self._tokens: 96 if tok.value == ';': 97 uwlines.append(uwline) 98 uwline = UnwrappedLine(self.depth) 99 else: 100 uwline.AppendToken(tok) 101 102 if uwline.tokens: 103 uwlines.append(uwline) 104 105 for uwline in uwlines: 106 pytree_utils.SetNodeAnnotation(uwline.first.node, 107 pytree_utils.Annotation.MUST_SPLIT, True) 108 uwline.first.previous_token = None 109 uwline.last.next_token = None 110 111 return uwlines 112 113 ############################################################################ 114 # Token Access and Manipulation Methods # 115 ############################################################################ 116 117 def AppendToken(self, token): 118 """Append a new FormatToken to the tokens contained in this line.""" 119 if self._tokens: 120 token.previous_token = self.last 121 self.last.next_token = token 122 self._tokens.append(token) 123 124 def AppendNode(self, node): 125 """Convenience method to append a pytree node directly. 126 127 Wraps the node with a FormatToken. 128 129 Arguments: 130 node: the node to append 131 """ 132 self.AppendToken(format_token.FormatToken(node)) 133 134 @property 135 def first(self): 136 """Returns the first non-whitespace token.""" 137 return self._tokens[0] 138 139 @property 140 def last(self): 141 """Returns the last non-whitespace token.""" 142 return self._tokens[-1] 143 144 ############################################################################ 145 # Token -> String Methods # 146 ############################################################################ 147 148 def AsCode(self, indent_per_depth=2): 149 """Return a "code" representation of this line. 150 151 The code representation shows how the line would be printed out as code. 152 153 TODO(eliben): for now this is rudimentary for debugging - once we add 154 formatting capabilities, this method will have other uses (not all tokens 155 have spaces around them, for example). 156 157 Arguments: 158 indent_per_depth: how much spaces to indend per depth level. 159 160 Returns: 161 A string representing the line as code. 162 """ 163 indent = ' ' * indent_per_depth * self.depth 164 tokens_str = ' '.join(tok.value for tok in self._tokens) 165 return indent + tokens_str 166 167 def __str__(self): # pragma: no cover 168 return self.AsCode() 169 170 def __repr__(self): # pragma: no cover 171 tokens_repr = ','.join( 172 ['{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens]) 173 return 'UnwrappedLine(depth={0}, tokens=[{1}])'.format( 174 self.depth, tokens_repr) 175 176 ############################################################################ 177 # Properties # 178 ############################################################################ 179 180 @property 181 def tokens(self): 182 """Access the tokens contained within this line. 183 184 The caller must not modify the tokens list returned by this method. 185 186 Returns: 187 List of tokens in this line. 188 """ 189 return self._tokens 190 191 @property 192 def lineno(self): 193 """Return the line number of this unwrapped line. 194 195 Returns: 196 The line number of the first token in this unwrapped line. 197 """ 198 return self.first.lineno 199 200 @property 201 def is_comment(self): 202 return self.first.is_comment 203 204 @property 205 def has_semicolon(self): 206 return any(tok.value == ';' for tok in self._tokens) 207 208 209def _IsIdNumberStringToken(tok): 210 return tok.is_keyword or tok.is_name or tok.is_number or tok.is_string 211 212 213def _IsUnaryOperator(tok): 214 return format_token.Subtype.UNARY_OPERATOR in tok.subtypes 215 216 217def _SpaceRequiredBetween(left, right): 218 """Return True if a space is required between the left and right token.""" 219 lval = left.value 220 rval = right.value 221 if (left.is_pseudo_paren and _IsIdNumberStringToken(right) and 222 left.previous_token and _IsIdNumberStringToken(left.previous_token)): 223 # Space between keyword... tokens and pseudo parens. 224 return True 225 if left.is_pseudo_paren or right.is_pseudo_paren: 226 # There should be a space after the ':' in a dictionary. 227 if left.OpensScope(): 228 return True 229 # The closing pseudo-paren shouldn't affect spacing. 230 return False 231 if left.is_continuation or right.is_continuation: 232 # The continuation node's value has all of the spaces it needs. 233 return False 234 if right.name in pytree_utils.NONSEMANTIC_TOKENS: 235 # No space before a non-semantic token. 236 return False 237 if _IsIdNumberStringToken(left) and _IsIdNumberStringToken(right): 238 # Spaces between keyword, string, number, and identifier tokens. 239 return True 240 if lval == ',' and rval == ':': 241 # We do want a space between a comma and colon. 242 return True 243 if rval in ':,': 244 # Otherwise, we never want a space before a colon or comma. 245 return False 246 if lval == ',' and rval in ']})': 247 # Add a space between ending ',' and closing bracket if requested. 248 return style.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET') 249 if lval == ',': 250 # We want a space after a comma. 251 return True 252 if lval == 'from' and rval == '.': 253 # Space before the '.' in an import statement. 254 return True 255 if lval == '.' and rval == 'import': 256 # Space after the '.' in an import statement. 257 return True 258 if (lval == '=' and rval == '.' and 259 format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN not in left.subtypes): 260 # Space between equal and '.' as in "X = ...". 261 return True 262 if ((right.is_keyword or right.is_name) and 263 (left.is_keyword or left.is_name)): 264 # Don't merge two keywords/identifiers. 265 return True 266 if (format_token.Subtype.SUBSCRIPT_COLON in left.subtypes or 267 format_token.Subtype.SUBSCRIPT_COLON in right.subtypes): 268 # A subscript shouldn't have spaces separating its colons. 269 return False 270 if (format_token.Subtype.TYPED_NAME in left.subtypes or 271 format_token.Subtype.TYPED_NAME in right.subtypes): 272 # A typed argument should have a space after the colon. 273 return True 274 if left.is_string: 275 if (rval == '=' and 276 format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in right.subtypes 277 ): 278 # If there is a type hint, then we don't want to add a space between the 279 # equal sign and the hint. 280 return False 281 if rval not in '[)]}.': 282 # A string followed by something other than a subscript, closing bracket, 283 # or dot should have a space after it. 284 return True 285 if left.is_binary_op and lval != '**' and _IsUnaryOperator(right): 286 # Space between the binary operator and the unary operator. 287 return True 288 if left.is_keyword and _IsUnaryOperator(right): 289 # Handle things like "not -3 < x". 290 return True 291 if _IsUnaryOperator(left) and _IsUnaryOperator(right): 292 # No space between two unary operators. 293 return False 294 if left.is_binary_op or right.is_binary_op: 295 if lval == '**' or rval == '**': 296 # Space around the "power" operator. 297 return style.Get('SPACES_AROUND_POWER_OPERATOR') 298 # Enforce spaces around binary operators except the blacklisted ones. 299 blacklist = style.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS') 300 return lval not in blacklist and rval not in blacklist 301 if (_IsUnaryOperator(left) and lval != 'not' and 302 (right.is_name or right.is_number or rval == '(')): 303 # The previous token was a unary op. No space is desired between it and 304 # the current token. 305 return False 306 if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in left.subtypes or 307 format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in right.subtypes): 308 # A named argument or default parameter shouldn't have spaces around it. 309 return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN') 310 if (format_token.Subtype.VARARGS_LIST in left.subtypes or 311 format_token.Subtype.VARARGS_LIST in right.subtypes): 312 return False 313 if (format_token.Subtype.VARARGS_STAR in left.subtypes or 314 format_token.Subtype.KWARGS_STAR_STAR in left.subtypes): 315 # Don't add a space after a vararg's star or a keyword's star-star. 316 return False 317 if lval == '@' and format_token.Subtype.DECORATOR in left.subtypes: 318 # Decorators shouldn't be separated from the 'at' sign. 319 return False 320 if left.is_keyword and rval == '.' or lval == '.' and right.is_keyword: 321 # Add space between keywords and dots. 322 return lval != 'None' 323 if lval == '.' or rval == '.': 324 # Don't place spaces between dots. 325 return False 326 if ((lval == '(' and rval == ')') or (lval == '[' and rval == ']') or 327 (lval == '{' and rval == '}')): 328 # Empty objects shouldn't be separated by spaces. 329 return False 330 if (lval in pytree_utils.OPENING_BRACKETS and 331 rval in pytree_utils.OPENING_BRACKETS): 332 # Nested objects' opening brackets shouldn't be separated. 333 return False 334 if (lval in pytree_utils.CLOSING_BRACKETS and 335 rval in pytree_utils.CLOSING_BRACKETS): 336 # Nested objects' closing brackets shouldn't be separated. 337 return False 338 if lval in pytree_utils.CLOSING_BRACKETS and rval in '([': 339 # A call, set, dictionary, or subscript that has a call or subscript after 340 # it shouldn't have a space between them. 341 return False 342 if lval in pytree_utils.OPENING_BRACKETS and _IsIdNumberStringToken(right): 343 # Don't separate the opening bracket from the first item. 344 return False 345 if left.is_name and rval in '([': 346 # Don't separate a call or array access from the name. 347 return False 348 if rval in pytree_utils.CLOSING_BRACKETS: 349 # Don't separate the closing bracket from the last item. 350 # FIXME(morbo): This might be too permissive. 351 return False 352 if lval == 'print' and rval == '(': 353 # Special support for the 'print' function. 354 return False 355 if lval in pytree_utils.OPENING_BRACKETS and _IsUnaryOperator(right): 356 # Don't separate a unary operator from the opening bracket. 357 return False 358 if (lval in pytree_utils.OPENING_BRACKETS and 359 (format_token.Subtype.VARARGS_STAR in right.subtypes or 360 format_token.Subtype.KWARGS_STAR_STAR in right.subtypes)): 361 # Don't separate a '*' or '**' from the opening bracket. 362 return False 363 if rval == ';': 364 # Avoid spaces before a semicolon. (Why is there a semicolon?!) 365 return False 366 if lval == '(' and rval == 'await': 367 # Special support for the 'await' keyword. Don't separate the 'await' 368 # keyword from an opening paren. 369 return False 370 return True 371 372 373def _MustBreakBefore(prev_token, cur_token): 374 """Return True if a line break is required before the current token.""" 375 if prev_token.is_comment or (prev_token.previous_token and 376 prev_token.is_pseudo_paren and 377 prev_token.previous_token.is_comment): 378 # Must break if the previous token was a comment. 379 return True 380 if (cur_token.is_string and prev_token.is_string and 381 IsSurroundedByBrackets(cur_token)): 382 # We want consecutive strings to be on separate lines. This is a 383 # reasonable assumption, because otherwise they should have written them 384 # all on the same line, or with a '+'. 385 return True 386 return pytree_utils.GetNodeAnnotation( 387 cur_token.node, pytree_utils.Annotation.MUST_SPLIT, default=False) 388 389 390def _CanBreakBefore(prev_token, cur_token): 391 """Return True if a line break may occur before the current token.""" 392 pval = prev_token.value 393 cval = cur_token.value 394 if py3compat.PY3: 395 if pval == 'yield' and cval == 'from': 396 # Don't break before a yield argument. 397 return False 398 if pval in {'async', 'await'} and cval in {'def', 'with', 'for'}: 399 # Don't break after sync keywords. 400 return False 401 if cur_token.split_penalty >= split_penalty.UNBREAKABLE: 402 return False 403 if pval == '@': 404 # Don't break right after the beginning of a decorator. 405 return False 406 if cval == ':': 407 # Don't break before the start of a block of code. 408 return False 409 if cval == ',': 410 # Don't break before a comma. 411 return False 412 if prev_token.is_name and cval == '(': 413 # Don't break in the middle of a function definition or call. 414 return False 415 if prev_token.is_name and cval == '[': 416 # Don't break in the middle of an array dereference. 417 return False 418 if prev_token.is_name and cval == '.': 419 # Don't break before the '.' in a dotted name. 420 return False 421 if cur_token.is_comment and prev_token.lineno == cur_token.lineno: 422 # Don't break a comment at the end of the line. 423 return False 424 if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: 425 # Don't break after a unary token. 426 return False 427 return True 428 429 430def IsSurroundedByBrackets(tok): 431 """Return True if the token is surrounded by brackets.""" 432 paren_count = 0 433 brace_count = 0 434 sq_bracket_count = 0 435 previous_token = tok.previous_token 436 while previous_token: 437 if previous_token.value == ')': 438 paren_count -= 1 439 elif previous_token.value == '}': 440 brace_count -= 1 441 elif previous_token.value == ']': 442 sq_bracket_count -= 1 443 444 if previous_token.value == '(': 445 if paren_count == 0: 446 return previous_token 447 paren_count += 1 448 elif previous_token.value == '{': 449 if brace_count == 0: 450 return previous_token 451 brace_count += 1 452 elif previous_token.value == '[': 453 if sq_bracket_count == 0: 454 return previous_token 455 sq_bracket_count += 1 456 457 previous_token = previous_token.previous_token 458 return None 459 460 461_LOGICAL_OPERATORS = frozenset({'and', 'or'}) 462_BITWISE_OPERATORS = frozenset({'&', '|', '^'}) 463_TERM_OPERATORS = frozenset({'*', '/', '%', '//'}) 464 465 466def _SplitPenalty(prev_token, cur_token): 467 """Return the penalty for breaking the line before the current token.""" 468 pval = prev_token.value 469 cval = cur_token.value 470 if pval == 'not': 471 return split_penalty.UNBREAKABLE 472 473 if cur_token.node_split_penalty > 0: 474 return cur_token.node_split_penalty 475 476 if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'): 477 # Prefer to split before 'and' and 'or'. 478 if pval in _LOGICAL_OPERATORS: 479 return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') 480 if cval in _LOGICAL_OPERATORS: 481 return 0 482 else: 483 # Prefer to split after 'and' and 'or'. 484 if pval in _LOGICAL_OPERATORS: 485 return 0 486 if cval in _LOGICAL_OPERATORS: 487 return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR') 488 489 if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'): 490 # Prefer to split before '&', '|', and '^'. 491 if pval in _BITWISE_OPERATORS: 492 return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') 493 if cval in _BITWISE_OPERATORS: 494 return 0 495 else: 496 # Prefer to split after '&', '|', and '^'. 497 if pval in _BITWISE_OPERATORS: 498 return 0 499 if cval in _BITWISE_OPERATORS: 500 return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR') 501 502 if (format_token.Subtype.COMP_FOR in cur_token.subtypes or 503 format_token.Subtype.COMP_IF in cur_token.subtypes): 504 # We don't mind breaking before the 'for' or 'if' of a list comprehension. 505 return 0 506 if format_token.Subtype.UNARY_OPERATOR in prev_token.subtypes: 507 # Try not to break after a unary operator. 508 return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR') 509 if pval == ',': 510 # Breaking after a comma is fine, if need be. 511 return 0 512 if prev_token.is_binary_op: 513 # We would rather not split after an equality operator. 514 return 20 515 if (format_token.Subtype.VARARGS_STAR in prev_token.subtypes or 516 format_token.Subtype.KWARGS_STAR_STAR in prev_token.subtypes): 517 # Don't split after a varargs * or kwargs **. 518 return split_penalty.UNBREAKABLE 519 if prev_token.OpensScope() and cval != '(': 520 # Slightly prefer 521 return style.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET') 522 if cval == ':': 523 # Don't split before a colon. 524 return split_penalty.UNBREAKABLE 525 if cval == '=': 526 # Don't split before an assignment. 527 return split_penalty.UNBREAKABLE 528 if (format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or 529 format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes): 530 # Don't break before or after an default or named assignment. 531 return split_penalty.UNBREAKABLE 532 if cval == '==': 533 # We would rather not split before an equality operator. 534 return split_penalty.STRONGLY_CONNECTED 535 if cur_token.ClosesScope(): 536 # Give a slight penalty for splitting before the closing scope. 537 return 100 538 if pval in _TERM_OPERATORS or cval in _TERM_OPERATORS: 539 return 50 540 return 0 541