1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Implements a format decision state object that manages whitespace decisions. 15 16Each token is processed one at a time, at which point its whitespace formatting 17decisions are made. A graph of potential whitespace formattings is created, 18where each node in the graph is a format decision state object. The heuristic 19tries formatting the token with and without a newline before it to determine 20which one has the least penalty. Therefore, the format decision state object for 21each decision needs to be its own unique copy. 22 23Once the heuristic determines the best formatting, it makes a non-dry run pass 24through the code to commit the whitespace formatting. 25 26 FormatDecisionState: main class exported by this module. 27""" 28 29from yapf.yapflib import format_token 30from yapf.yapflib import object_state 31from yapf.yapflib import split_penalty 32from yapf.yapflib import style 33from yapf.yapflib import unwrapped_line 34 35 36class FormatDecisionState(object): 37 """The current state when indenting an unwrapped line. 38 39 The FormatDecisionState object is meant to be copied instead of referenced. 40 41 Attributes: 42 first_indent: The indent of the first token. 43 column: The number of used columns in the current line. 44 next_token: The next token to be formatted. 45 paren_level: The level of nesting inside (), [], and {}. 46 lowest_level_on_line: The lowest paren_level on the current line. 47 newline: Indicates if a newline is added along the edge to this format 48 decision state node. 49 previous: The previous format decision state in the decision tree. 50 stack: A stack (of _ParenState) keeping track of properties applying to 51 parenthesis levels. 52 comp_stack: A stack (of ComprehensionState) keeping track of properties 53 applying to comprehensions. 54 ignore_stack_for_comparison: Ignore the stack of _ParenState for state 55 comparison. 56 """ 57 58 def __init__(self, line, first_indent): 59 """Initializer. 60 61 Initializes to the state after placing the first token from 'line' at 62 'first_indent'. 63 64 Arguments: 65 line: (UnwrappedLine) The unwrapped line we're currently processing. 66 first_indent: (int) The indent of the first token. 67 """ 68 self.next_token = line.first 69 self.column = first_indent 70 self.line = line 71 self.paren_level = 0 72 self.lowest_level_on_line = 0 73 self.ignore_stack_for_comparison = False 74 self.stack = [_ParenState(first_indent, first_indent)] 75 self.comp_stack = [] 76 self.first_indent = first_indent 77 self.newline = False 78 self.previous = None 79 self.column_limit = style.Get('COLUMN_LIMIT') 80 81 def Clone(self): 82 """Clones a FormatDecisionState object.""" 83 new = FormatDecisionState(self.line, self.first_indent) 84 new.next_token = self.next_token 85 new.column = self.column 86 new.line = self.line 87 new.paren_level = self.paren_level 88 new.line.depth = self.line.depth 89 new.lowest_level_on_line = self.lowest_level_on_line 90 new.ignore_stack_for_comparison = self.ignore_stack_for_comparison 91 new.first_indent = self.first_indent 92 new.newline = self.newline 93 new.previous = self.previous 94 new.stack = [state.Clone() for state in self.stack] 95 new.comp_stack = [state.Clone() for state in self.comp_stack] 96 return new 97 98 def __eq__(self, other): 99 # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous', 100 # because it shouldn't have a bearing on this comparison. (I.e., it will 101 # report equal if 'next_token' does.) 102 return (self.next_token == other.next_token and 103 self.column == other.column and 104 self.paren_level == other.paren_level and 105 self.line.depth == other.line.depth and 106 self.lowest_level_on_line == other.lowest_level_on_line and 107 (self.ignore_stack_for_comparison or 108 other.ignore_stack_for_comparison or 109 self.stack == other.stack and self.comp_stack == other.comp_stack)) 110 111 def __ne__(self, other): 112 return not self == other 113 114 def __hash__(self): 115 return hash((self.next_token, self.column, self.paren_level, 116 self.line.depth, self.lowest_level_on_line)) 117 118 def __repr__(self): 119 return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' % 120 (self.column, repr(self.next_token), self.paren_level, 121 '\n\t'.join(repr(s) for s in self.stack) + ']')) 122 123 def CanSplit(self, must_split): 124 """Determine if we can split before the next token. 125 126 Arguments: 127 must_split: (bool) A newline was required before this token. 128 129 Returns: 130 True if the line can be split before the next token. 131 """ 132 current = self.next_token 133 previous = current.previous_token 134 135 if current.is_pseudo_paren: 136 return False 137 138 if (not must_split and 139 format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes and 140 format_token.Subtype.DICTIONARY_KEY not in current.subtypes and 141 not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')): 142 # In some situations, a dictionary may be multiline, but pylint doesn't 143 # like it. So don't allow it unless forced to. 144 return False 145 146 if (not must_split and 147 format_token.Subtype.DICTIONARY_VALUE in current.subtypes and 148 not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')): 149 return False 150 151 if previous and previous.value == '(' and current.value == ')': 152 # Don't split an empty function call list if we aren't splitting before 153 # dict values. 154 token = previous.previous_token 155 while token: 156 prev = token.previous_token 157 if not prev or prev.name not in {'NAME', 'DOT'}: 158 break 159 token = token.previous_token 160 if token and format_token.Subtype.DICTIONARY_VALUE in token.subtypes: 161 if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'): 162 return False 163 164 if previous and previous.value == '.' and current.value == '.': 165 return False 166 167 return current.can_break_before 168 169 def MustSplit(self): 170 """Returns True if the line must split before the next token.""" 171 current = self.next_token 172 previous = current.previous_token 173 174 if current.is_pseudo_paren: 175 return False 176 177 if current.must_break_before: 178 return True 179 180 if not previous: 181 return False 182 183 if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',': 184 return True 185 186 if (self.stack[-1].split_before_closing_bracket and 187 current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET')): 188 # Split before the closing bracket if we can. 189 return current.node_split_penalty != split_penalty.UNBREAKABLE 190 191 if (current.value == ')' and previous.value == ',' and 192 not _IsSingleElementTuple(current.matching_bracket)): 193 return True 194 195 # Prevent splitting before the first argument in compound statements 196 # with the exception of function declarations. 197 if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and 198 _IsCompoundStatement(self.line.first) and 199 not _IsFunctionDef(self.line.first)): 200 return False 201 202 ########################################################################### 203 # List Splitting 204 if (style.Get('DEDENT_CLOSING_BRACKETS') or 205 style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')): 206 bracket = current if current.ClosesScope() else previous 207 if format_token.Subtype.SUBSCRIPT_BRACKET not in bracket.subtypes: 208 if bracket.OpensScope(): 209 if style.Get('COALESCE_BRACKETS'): 210 if current.OpensScope(): 211 # Prefer to keep all opening brackets together. 212 return False 213 214 if (not _IsLastScopeInLine(bracket) or 215 unwrapped_line.IsSurroundedByBrackets(bracket)): 216 last_token = bracket.matching_bracket 217 else: 218 last_token = _LastTokenInLine(bracket.matching_bracket) 219 220 if not self._FitsOnLine(bracket, last_token): 221 # Split before the first element if the whole list can't fit on a 222 # single line. 223 self.stack[-1].split_before_closing_bracket = True 224 return True 225 226 elif style.Get('DEDENT_CLOSING_BRACKETS') and current.ClosesScope(): 227 # Split before and dedent the closing bracket. 228 return self.stack[-1].split_before_closing_bracket 229 230 if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and 231 current.is_name): 232 # An expression that's surrounded by parens gets split after the opening 233 # parenthesis. 234 def SurroundedByParens(token): 235 """Check if it's an expression surrounded by parentheses.""" 236 while token: 237 if token.value == ',': 238 return False 239 if token.value == ')': 240 return not token.next_token 241 if token.OpensScope(): 242 token = token.matching_bracket.next_token 243 else: 244 token = token.next_token 245 return False 246 247 if (previous.value == '(' and not previous.is_pseudo_paren and 248 not unwrapped_line.IsSurroundedByBrackets(previous)): 249 pptoken = previous.previous_token 250 if (pptoken and not pptoken.is_name and not pptoken.is_keyword and 251 SurroundedByParens(current)): 252 return True 253 254 if (current.is_name or current.is_string) and previous.value == ',': 255 # If the list has function calls in it and the full list itself cannot 256 # fit on the line, then we want to split. Otherwise, we'll get something 257 # like this: 258 # 259 # X = [ 260 # Bar(xxx='some string', 261 # yyy='another long string', 262 # zzz='a third long string'), Bar( 263 # xxx='some string', 264 # yyy='another long string', 265 # zzz='a third long string') 266 # ] 267 # 268 # or when a string formatting syntax. 269 func_call_or_string_format = False 270 tok = current.next_token 271 if current.is_name: 272 while tok and (tok.is_name or tok.value == '.'): 273 tok = tok.next_token 274 func_call_or_string_format = tok and tok.value == '(' 275 elif current.is_string: 276 while tok and tok.is_string: 277 tok = tok.next_token 278 func_call_or_string_format = tok and tok.value == '%' 279 if func_call_or_string_format: 280 open_bracket = unwrapped_line.IsSurroundedByBrackets(current) 281 if open_bracket: 282 if open_bracket.value in '[{': 283 if not self._FitsOnLine(open_bracket, 284 open_bracket.matching_bracket): 285 return True 286 elif tok.value == '(': 287 if not self._FitsOnLine(current, tok.matching_bracket): 288 return True 289 290 ########################################################################### 291 # Dict/Set Splitting 292 if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and 293 format_token.Subtype.DICTIONARY_KEY in current.subtypes and 294 not current.is_comment): 295 # Place each dictionary entry onto its own line. 296 if previous.value == '{' and previous.previous_token: 297 opening = _GetOpeningBracket(previous.previous_token) 298 if (opening and opening.value == '(' and opening.previous_token and 299 opening.previous_token.is_name): 300 # This is a dictionary that's an argument to a function. 301 if (self._FitsOnLine(previous, previous.matching_bracket) and 302 previous.matching_bracket.next_token and 303 (not opening.matching_bracket.next_token or 304 opening.matching_bracket.next_token.value != '.') and 305 _ScopeHasNoCommas(previous)): 306 # Don't split before the key if: 307 # - The dictionary fits on a line, and 308 # - The function call isn't part of a builder-style call and 309 # - The dictionary has one entry and no trailing comma 310 return False 311 return True 312 313 if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and 314 format_token.Subtype.DICT_SET_GENERATOR in current.subtypes): 315 # Split before a dict/set generator. 316 return True 317 318 if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or 319 (previous.is_pseudo_paren and previous.value == '(' and 320 not current.is_comment)): 321 # Split before the dictionary value if we can't fit every dictionary 322 # entry on its own line. 323 if not current.OpensScope(): 324 opening = _GetOpeningBracket(current) 325 if not self._EachDictEntryFitsOnOneLine(opening): 326 return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE') 327 328 if previous.value == '{': 329 # Split if the dict/set cannot fit on one line and ends in a comma. 330 closing = previous.matching_bracket 331 if (not self._FitsOnLine(previous, closing) and 332 closing.previous_token.value == ','): 333 self.stack[-1].split_before_closing_bracket = True 334 return True 335 336 ########################################################################### 337 # Argument List Splitting 338 if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and 339 format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in 340 current.subtypes): 341 if (previous.value not in {'=', ':', '*', '**'} and 342 current.value not in ':=,)' and not _IsFunctionDefinition(previous)): 343 # If we're going to split the lines because of named arguments, then we 344 # want to split after the opening bracket as well. But not when this is 345 # part of a function definition. 346 if previous.value == '(': 347 # Make sure we don't split after the opening bracket if the 348 # continuation indent is greater than the opening bracket: 349 # 350 # a( 351 # b=1, 352 # c=2) 353 if (self._FitsOnLine(previous, previous.matching_bracket) and 354 unwrapped_line.IsSurroundedByBrackets(previous)): 355 # An argument to a function is a function call with named 356 # assigns. 357 return False 358 359 column = self.column - self.stack[-1].last_space 360 return column > style.Get('CONTINUATION_INDENT_WIDTH') 361 362 opening = _GetOpeningBracket(current) 363 if opening: 364 arglist_length = ( 365 opening.matching_bracket.total_length - opening.total_length + 366 self.stack[-1].indent) 367 return arglist_length > self.column_limit 368 369 if (current.value not in '{)' and previous.value == '(' and 370 self._ArgumentListHasDictionaryEntry(current)): 371 return True 372 373 if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'): 374 # Split before arguments in a function call or definition if the 375 # arguments are terminated by a comma. 376 opening = _GetOpeningBracket(current) 377 if opening and opening.previous_token and opening.previous_token.is_name: 378 if previous.value in '(,': 379 if opening.matching_bracket.previous_token.value == ',': 380 return True 381 382 if ((current.is_name or current.value in {'*', '**'}) and 383 previous.value == ','): 384 # If we have a function call within an argument list and it won't fit on 385 # the remaining line, but it will fit on a line by itself, then go ahead 386 # and split before the call. 387 opening = _GetOpeningBracket(current) 388 if (opening and opening.value == '(' and opening.previous_token and 389 (opening.previous_token.is_name or 390 opening.previous_token.value in {'*', '**'})): 391 is_func_call = False 392 opening = current 393 while opening: 394 if opening.value == '(': 395 is_func_call = True 396 break 397 if (not (opening.is_name or opening.value in {'*', '**'}) and 398 opening.value != '.'): 399 break 400 opening = opening.next_token 401 402 if is_func_call: 403 if (not self._FitsOnLine(current, opening.matching_bracket) or 404 (opening.matching_bracket.next_token and 405 opening.matching_bracket.next_token.value != ',' and 406 not opening.matching_bracket.next_token.ClosesScope())): 407 return True 408 409 pprevious = previous.previous_token 410 if (current.is_name and pprevious and pprevious.is_name and 411 previous.value == '('): 412 if (not self._FitsOnLine(previous, previous.matching_bracket) and 413 _IsFunctionCallWithArguments(current)): 414 # There is a function call, with more than 1 argument, where the first 415 # argument is itself a function call with arguments. In this specific 416 # case, if we split after the first argument's opening '(', then the 417 # formatting will look bad for the rest of the arguments. E.g.: 418 # 419 # outer_function_call(inner_function_call( 420 # inner_arg1, inner_arg2), 421 # outer_arg1, outer_arg2) 422 # 423 # Instead, enforce a split before that argument to keep things looking 424 # good. 425 return True 426 427 if (previous.OpensScope() and not current.OpensScope() and 428 not current.is_comment and 429 format_token.Subtype.SUBSCRIPT_BRACKET not in previous.subtypes): 430 if pprevious and not pprevious.is_keyword and not pprevious.is_name: 431 # We want to split if there's a comment in the container. 432 token = current 433 while token != previous.matching_bracket: 434 if token.is_comment: 435 return True 436 token = token.next_token 437 if previous.value == '(': 438 pptoken = previous.previous_token 439 if not pptoken or not pptoken.is_name: 440 # Split after the opening of a tuple if it doesn't fit on the current 441 # line and it's not a function call. 442 if self._FitsOnLine(previous, previous.matching_bracket): 443 return False 444 elif not self._FitsOnLine(previous, previous.matching_bracket): 445 if len(previous.container_elements) == 1: 446 return False 447 448 elements = previous.container_elements + [previous.matching_bracket] 449 i = 1 450 while i < len(elements): 451 if (not elements[i - 1].OpensScope() and 452 not self._FitsOnLine(elements[i - 1], elements[i])): 453 return True 454 i += 1 455 456 if (self.column_limit - self.column) / float(self.column_limit) < 0.3: 457 # Try not to squish all of the arguments off to the right. 458 return True 459 else: 460 # Split after the opening of a container if it doesn't fit on the 461 # current line. 462 if not self._FitsOnLine(previous, previous.matching_bracket): 463 return True 464 465 ########################################################################### 466 # Original Formatting Splitting 467 # These checks rely upon the original formatting. This is in order to 468 # attempt to keep hand-written code in the same condition as it was before. 469 # However, this may cause the formatter to fail to be idempotent. 470 if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and 471 previous.lineno < current.lineno): 472 # Retain the split before a bitwise operator. 473 return True 474 475 if (current.is_comment and 476 previous.lineno < current.lineno - current.value.count('\n')): 477 # If a comment comes in the middle of an unwrapped line (like an if 478 # conditional with comments interspersed), then we want to split if the 479 # original comments were on a separate line. 480 return True 481 482 return False 483 484 def AddTokenToState(self, newline, dry_run, must_split=False): 485 """Add a token to the format decision state. 486 487 Allow the heuristic to try out adding the token with and without a newline. 488 Later on, the algorithm will determine which one has the lowest penalty. 489 490 Arguments: 491 newline: (bool) Add the token on a new line if True. 492 dry_run: (bool) Don't commit whitespace changes to the FormatToken if 493 True. 494 must_split: (bool) A newline was required before this token. 495 496 Returns: 497 The penalty of splitting after the current token. 498 """ 499 penalty = 0 500 if newline: 501 penalty = self._AddTokenOnNewline(dry_run, must_split) 502 else: 503 self._AddTokenOnCurrentLine(dry_run) 504 505 penalty += self._CalculateComprehensionState(newline) 506 507 return self.MoveStateToNextToken() + penalty 508 509 def _AddTokenOnCurrentLine(self, dry_run): 510 """Puts the token on the current line. 511 512 Appends the next token to the state and updates information necessary for 513 indentation. 514 515 Arguments: 516 dry_run: (bool) Commit whitespace changes to the FormatToken if True. 517 """ 518 current = self.next_token 519 previous = current.previous_token 520 521 spaces = current.spaces_required_before 522 if not dry_run: 523 current.AddWhitespacePrefix(newlines_before=0, spaces=spaces) 524 525 if previous.OpensScope(): 526 if not current.is_comment: 527 # Align closing scopes that are on a newline with the opening scope: 528 # 529 # foo = [a, 530 # b, 531 # ] 532 self.stack[-1].closing_scope_indent = self.column - 1 533 if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'): 534 self.stack[-1].closing_scope_indent += 1 535 self.stack[-1].indent = self.column + spaces 536 else: 537 self.stack[-1].closing_scope_indent = ( 538 self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) 539 540 self.column += spaces 541 542 def _AddTokenOnNewline(self, dry_run, must_split): 543 """Adds a line break and necessary indentation. 544 545 Appends the next token to the state and updates information necessary for 546 indentation. 547 548 Arguments: 549 dry_run: (bool) Don't commit whitespace changes to the FormatToken if 550 True. 551 must_split: (bool) A newline was required before this token. 552 553 Returns: 554 The split penalty for splitting after the current state. 555 """ 556 current = self.next_token 557 previous = current.previous_token 558 559 self.column = self._GetNewlineColumn() 560 561 if not dry_run: 562 indent_level = self.line.depth 563 spaces = self.column 564 if spaces: 565 spaces -= indent_level * style.Get('INDENT_WIDTH') 566 current.AddWhitespacePrefix( 567 newlines_before=1, spaces=spaces, indent_level=indent_level) 568 569 if not current.is_comment: 570 self.stack[-1].last_space = self.column 571 self.lowest_level_on_line = self.paren_level 572 573 if (previous.OpensScope() or 574 (previous.is_comment and previous.previous_token is not None and 575 previous.previous_token.OpensScope())): 576 self.stack[-1].closing_scope_indent = max( 577 0, self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH')) 578 579 self.stack[-1].split_before_closing_bracket = True 580 581 # Calculate the split penalty. 582 penalty = current.split_penalty 583 584 if must_split: 585 # Don't penalize for a must split. 586 return penalty 587 588 if previous.is_pseudo_paren and previous.value == '(': 589 # Small penalty for splitting after a pseudo paren. 590 penalty += 50 591 592 # Add a penalty for each increasing newline we add, but don't penalize for 593 # splitting before an if-expression or list comprehension. 594 if current.value not in {'if', 'for'}: 595 last = self.stack[-1] 596 last.num_line_splits += 1 597 penalty += ( 598 style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') * 599 last.num_line_splits) 600 601 if current.OpensScope() and previous.OpensScope(): 602 # Prefer to keep opening brackets coalesced (unless it's at the beginning 603 # of a function call). 604 pprev = previous.previous_token 605 if not pprev or not pprev.is_name: 606 penalty += 10 607 608 return penalty + 10 609 610 def MoveStateToNextToken(self): 611 """Calculate format decision state information and move onto the next token. 612 613 Before moving onto the next token, we first calculate the format decision 614 state given the current token and its formatting decisions. Then the format 615 decision state is set up so that the next token can be added. 616 617 Returns: 618 The penalty for the number of characters over the column limit. 619 """ 620 current = self.next_token 621 if not current.OpensScope() and not current.ClosesScope(): 622 self.lowest_level_on_line = min(self.lowest_level_on_line, 623 self.paren_level) 624 625 # If we encounter an opening bracket, we add a level to our stack to prepare 626 # for the subsequent tokens. 627 if current.OpensScope(): 628 last = self.stack[-1] 629 new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space 630 631 self.stack.append(_ParenState(new_indent, self.stack[-1].last_space)) 632 self.paren_level += 1 633 634 # If we encounter a closing bracket, we can remove a level from our 635 # parenthesis stack. 636 if len(self.stack) > 1 and current.ClosesScope(): 637 if format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes: 638 self.stack[-2].last_space = self.stack[-2].indent 639 else: 640 self.stack[-2].last_space = self.stack[-1].last_space 641 self.stack.pop() 642 self.paren_level -= 1 643 644 is_multiline_string = current.is_string and '\n' in current.value 645 if is_multiline_string: 646 # This is a multiline string. Only look at the first line. 647 self.column += len(current.value.split('\n')[0]) 648 elif not current.is_pseudo_paren: 649 self.column += len(current.value) 650 651 self.next_token = self.next_token.next_token 652 653 # Calculate the penalty for overflowing the column limit. 654 penalty = 0 655 if (not current.is_pylint_comment and not current.is_pytype_comment and 656 self.column > self.column_limit): 657 excess_characters = self.column - self.column_limit 658 penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters 659 660 if is_multiline_string: 661 # If this is a multiline string, the column is actually the 662 # end of the last line in the string. 663 self.column = len(current.value.split('\n')[-1]) 664 665 return penalty 666 667 def _CalculateComprehensionState(self, newline): 668 """Makes required changes to comprehension state. 669 670 Args: 671 newline: Whether the current token is to be added on a newline. 672 673 Returns: 674 The penalty for the token-newline combination given the current 675 comprehension state. 676 """ 677 current = self.next_token 678 previous = current.previous_token 679 top_of_stack = self.comp_stack[-1] if self.comp_stack else None 680 penalty = 0 681 682 if top_of_stack is not None: 683 # Check if the token terminates the current comprehension. 684 if current == top_of_stack.closing_bracket: 685 last = self.comp_stack.pop() 686 # Lightly penalize comprehensions that are split across multiple lines. 687 if last.has_interior_split: 688 penalty += style.Get('SPLIT_PENALTY_COMPREHENSION') 689 690 return penalty 691 692 if newline: 693 top_of_stack.has_interior_split = True 694 695 if (format_token.Subtype.COMP_EXPR in current.subtypes and 696 format_token.Subtype.COMP_EXPR not in previous.subtypes): 697 self.comp_stack.append(object_state.ComprehensionState(current)) 698 return penalty 699 700 if (current.value == 'for' and 701 format_token.Subtype.COMP_FOR in current.subtypes): 702 if top_of_stack.for_token is not None: 703 # Treat nested comprehensions like normal comp_if expressions. 704 # Example: 705 # my_comp = [ 706 # a.qux + b.qux 707 # for a in foo 708 # --> for b in bar <-- 709 # if a.zut + b.zut 710 # ] 711 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and 712 top_of_stack.has_split_at_for != newline and 713 (top_of_stack.has_split_at_for or 714 not top_of_stack.HasTrivialExpr())): 715 penalty += split_penalty.UNBREAKABLE 716 else: 717 top_of_stack.for_token = current 718 top_of_stack.has_split_at_for = newline 719 720 # Try to keep trivial expressions on the same line as the comp_for. 721 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and 722 top_of_stack.HasTrivialExpr()): 723 penalty += split_penalty.CONNECTED 724 725 if (format_token.Subtype.COMP_IF in current.subtypes and 726 format_token.Subtype.COMP_IF not in previous.subtypes): 727 # Penalize breaking at comp_if when it doesn't match the newline structure 728 # in the rest of the comprehension. 729 if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and 730 top_of_stack.has_split_at_for != newline and 731 (top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())): 732 penalty += split_penalty.UNBREAKABLE 733 734 return penalty 735 736 def _GetNewlineColumn(self): 737 """Return the new column on the newline.""" 738 current = self.next_token 739 previous = current.previous_token 740 top_of_stack = self.stack[-1] 741 742 if current.spaces_required_before > 2 or self.line.disable: 743 return current.spaces_required_before 744 745 if current.OpensScope(): 746 return top_of_stack.indent if self.paren_level else self.first_indent 747 748 if current.ClosesScope(): 749 if (previous.OpensScope() or 750 (previous.is_comment and previous.previous_token is not None and 751 previous.previous_token.OpensScope())): 752 return max(0, 753 top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH')) 754 return top_of_stack.closing_scope_indent 755 756 if (previous and previous.is_string and current.is_string and 757 format_token.Subtype.DICTIONARY_VALUE in current.subtypes): 758 return previous.column 759 760 if style.Get('INDENT_DICTIONARY_VALUE'): 761 if previous and (previous.value == ':' or previous.is_pseudo_paren): 762 if format_token.Subtype.DICTIONARY_VALUE in current.subtypes: 763 return top_of_stack.indent 764 765 if (_IsCompoundStatement(self.line.first) and 766 (not style.Get('DEDENT_CLOSING_BRACKETS') or 767 style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))): 768 token_indent = ( 769 len(self.line.first.whitespace_prefix.split('\n')[-1]) + 770 style.Get('INDENT_WIDTH')) 771 if token_indent == top_of_stack.indent: 772 return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH') 773 774 return top_of_stack.indent 775 776 def _FitsOnLine(self, start, end): 777 """Determines if line between start and end can fit on the current line.""" 778 length = end.total_length - start.total_length 779 if not start.is_pseudo_paren: 780 length += len(start.value) 781 return length + self.column <= self.column_limit 782 783 def _EachDictEntryFitsOnOneLine(self, opening): 784 """Determine if each dict elems can fit on one line.""" 785 786 def PreviousNonCommentToken(tok): 787 tok = tok.previous_token 788 while tok.is_comment: 789 tok = tok.previous_token 790 return tok 791 792 def ImplicitStringConcatenation(tok): 793 num_strings = 0 794 if tok.is_pseudo_paren: 795 tok = tok.next_token 796 while tok.is_string: 797 num_strings += 1 798 tok = tok.next_token 799 return num_strings > 1 800 801 closing = opening.matching_bracket 802 entry_start = opening.next_token 803 current = opening.next_token.next_token 804 805 while current and current != closing: 806 if format_token.Subtype.DICTIONARY_KEY in current.subtypes: 807 prev = PreviousNonCommentToken(current) 808 length = prev.total_length - entry_start.total_length 809 length += len(entry_start.value) 810 if length + self.stack[-2].indent >= self.column_limit: 811 return False 812 entry_start = current 813 if current.OpensScope(): 814 if ((current.value == '{' or 815 (current.is_pseudo_paren and current.next_token.value == '{') and 816 format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or 817 ImplicitStringConcatenation(current)): 818 # A dictionary entry that cannot fit on a single line shouldn't matter 819 # to this calculation. If it can't fit on a single line, then the 820 # opening should be on the same line as the key and the rest on 821 # newlines after it. But the other entries should be on single lines 822 # if possible. 823 if current.matching_bracket: 824 current = current.matching_bracket 825 while current: 826 if current == closing: 827 return True 828 if format_token.Subtype.DICTIONARY_KEY in current.subtypes: 829 entry_start = current 830 break 831 current = current.next_token 832 else: 833 current = current.matching_bracket 834 else: 835 current = current.next_token 836 837 # At this point, current is the closing bracket. Go back one to get the the 838 # end of the dictionary entry. 839 current = PreviousNonCommentToken(current) 840 length = current.total_length - entry_start.total_length 841 length += len(entry_start.value) 842 return length + self.stack[-2].indent <= self.column_limit 843 844 def _ArgumentListHasDictionaryEntry(self, token): 845 """Check if the function argument list has a dictionary as an arg.""" 846 if _IsArgumentToFunction(token): 847 while token: 848 if token.value == '{': 849 length = token.matching_bracket.total_length - token.total_length 850 return length + self.stack[-2].indent > self.column_limit 851 if token.ClosesScope(): 852 break 853 if token.OpensScope(): 854 token = token.matching_bracket 855 token = token.next_token 856 return False 857 858 859_COMPOUND_STMTS = frozenset( 860 {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'}) 861 862 863def _IsCompoundStatement(token): 864 if token.value == 'async': 865 token = token.next_token 866 return token.value in _COMPOUND_STMTS 867 868 869def _IsFunctionDef(token): 870 if token.value == 'async': 871 token = token.next_token 872 return token.value == 'def' 873 874 875def _IsFunctionCallWithArguments(token): 876 while token: 877 if token.value == '(': 878 token = token.next_token 879 return token and token.value != ')' 880 elif token.name not in {'NAME', 'DOT', 'EQUAL'}: 881 break 882 token = token.next_token 883 return False 884 885 886def _IsArgumentToFunction(token): 887 bracket = unwrapped_line.IsSurroundedByBrackets(token) 888 if not bracket or bracket.value != '(': 889 return False 890 previous = bracket.previous_token 891 return previous and previous.is_name 892 893 894def _GetLengthOfSubtype(token, subtype, exclude=None): 895 current = token 896 while (current.next_token and subtype in current.subtypes and 897 (exclude is None or exclude not in current.subtypes)): 898 current = current.next_token 899 return current.total_length - token.total_length + 1 900 901 902def _GetOpeningBracket(current): 903 """Get the opening bracket containing the current token.""" 904 if current.matching_bracket and not current.is_pseudo_paren: 905 return current.matching_bracket 906 while current: 907 if current.ClosesScope(): 908 current = current.matching_bracket 909 elif current.is_pseudo_paren: 910 current = current.previous_token 911 elif current.OpensScope(): 912 return current 913 current = current.previous_token 914 return None 915 916 917def _LastTokenInLine(current): 918 while not current.is_comment and current.next_token: 919 current = current.next_token 920 return current 921 922 923def _IsFunctionDefinition(current): 924 prev = current.previous_token 925 return (current.value == '(' and prev and 926 format_token.Subtype.FUNC_DEF in prev.subtypes) 927 928 929def _IsLastScopeInLine(current): 930 while current: 931 current = current.next_token 932 if current and current.OpensScope(): 933 return False 934 return True 935 936 937def _IsSingleElementTuple(token): 938 """Check if it's a single-element tuple.""" 939 close = token.matching_bracket 940 token = token.next_token 941 num_commas = 0 942 while token != close: 943 if token.value == ',': 944 num_commas += 1 945 if token.OpensScope(): 946 token = token.matching_bracket 947 else: 948 token = token.next_token 949 return num_commas == 1 950 951 952def _ScopeHasNoCommas(token): 953 """Check if the scope has no commas.""" 954 close = token.matching_bracket 955 token = token.next_token 956 while token != close: 957 if token.value == ',': 958 return False 959 if token.OpensScope(): 960 token = token.matching_bracket 961 else: 962 token = token.next_token 963 return True 964 965 966class _ParenState(object): 967 """Maintains the state of the bracket enclosures. 968 969 A stack of _ParenState objects are kept so that we know how to indent relative 970 to the brackets. 971 972 Attributes: 973 indent: The column position to which a specified parenthesis level needs to 974 be indented. 975 last_space: The column position of the last space on each level. 976 split_before_closing_bracket: Whether a newline needs to be inserted before 977 the closing bracket. We only want to insert a newline before the closing 978 bracket if there also was a newline after the beginning left bracket. 979 num_line_splits: Number of line splits this _ParenState contains already. 980 Each subsequent line split gets an increasing penalty. 981 """ 982 983 # TODO(morbo): This doesn't track "bin packing." 984 985 def __init__(self, indent, last_space): 986 self.indent = indent 987 self.last_space = last_space 988 self.closing_scope_indent = 0 989 self.split_before_closing_bracket = False 990 self.num_line_splits = 0 991 992 def Clone(self): 993 state = _ParenState(self.indent, self.last_space) 994 state.closing_scope_indent = self.closing_scope_indent 995 state.split_before_closing_bracket = self.split_before_closing_bracket 996 state.num_line_splits = self.num_line_splits 997 return state 998 999 def __repr__(self): 1000 return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % ( 1001 self.indent, self.last_space, self.closing_scope_indent) 1002 1003 def __eq__(self, other): 1004 return hash(self) == hash(other) 1005 1006 def __ne__(self, other): 1007 return not self == other 1008 1009 def __hash__(self, *args, **kwargs): 1010 return hash((self.indent, self.last_space, self.closing_scope_indent, 1011 self.split_before_closing_bracket, self.num_line_splits)) 1012