1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Implements a format decision state object that manages whitespace decisions.
15
16Each token is processed one at a time, at which point its whitespace formatting
17decisions are made. A graph of potential whitespace formattings is created,
18where each node in the graph is a format decision state object. The heuristic
19tries formatting the token with and without a newline before it to determine
20which one has the least penalty. Therefore, the format decision state object for
21each decision needs to be its own unique copy.
22
23Once the heuristic determines the best formatting, it makes a non-dry run pass
24through the code to commit the whitespace formatting.
25
26  FormatDecisionState: main class exported by this module.
27"""
28
29from yapf.yapflib import format_token
30from yapf.yapflib import object_state
31from yapf.yapflib import split_penalty
32from yapf.yapflib import style
33from yapf.yapflib import unwrapped_line
34
35
36class FormatDecisionState(object):
37  """The current state when indenting an unwrapped line.
38
39  The FormatDecisionState object is meant to be copied instead of referenced.
40
41  Attributes:
42    first_indent: The indent of the first token.
43    column: The number of used columns in the current line.
44    next_token: The next token to be formatted.
45    paren_level: The level of nesting inside (), [], and {}.
46    lowest_level_on_line: The lowest paren_level on the current line.
47    newline: Indicates if a newline is added along the edge to this format
48      decision state node.
49    previous: The previous format decision state in the decision tree.
50    stack: A stack (of _ParenState) keeping track of properties applying to
51      parenthesis levels.
52    comp_stack: A stack (of ComprehensionState) keeping track of properties
53      applying to comprehensions.
54    ignore_stack_for_comparison: Ignore the stack of _ParenState for state
55      comparison.
56  """
57
58  def __init__(self, line, first_indent):
59    """Initializer.
60
61    Initializes to the state after placing the first token from 'line' at
62    'first_indent'.
63
64    Arguments:
65      line: (UnwrappedLine) The unwrapped line we're currently processing.
66      first_indent: (int) The indent of the first token.
67    """
68    self.next_token = line.first
69    self.column = first_indent
70    self.line = line
71    self.paren_level = 0
72    self.lowest_level_on_line = 0
73    self.ignore_stack_for_comparison = False
74    self.stack = [_ParenState(first_indent, first_indent)]
75    self.comp_stack = []
76    self.first_indent = first_indent
77    self.newline = False
78    self.previous = None
79    self.column_limit = style.Get('COLUMN_LIMIT')
80
81  def Clone(self):
82    """Clones a FormatDecisionState object."""
83    new = FormatDecisionState(self.line, self.first_indent)
84    new.next_token = self.next_token
85    new.column = self.column
86    new.line = self.line
87    new.paren_level = self.paren_level
88    new.line.depth = self.line.depth
89    new.lowest_level_on_line = self.lowest_level_on_line
90    new.ignore_stack_for_comparison = self.ignore_stack_for_comparison
91    new.first_indent = self.first_indent
92    new.newline = self.newline
93    new.previous = self.previous
94    new.stack = [state.Clone() for state in self.stack]
95    new.comp_stack = [state.Clone() for state in self.comp_stack]
96    return new
97
98  def __eq__(self, other):
99    # Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous',
100    # because it shouldn't have a bearing on this comparison. (I.e., it will
101    # report equal if 'next_token' does.)
102    return (self.next_token == other.next_token and
103            self.column == other.column and
104            self.paren_level == other.paren_level and
105            self.line.depth == other.line.depth and
106            self.lowest_level_on_line == other.lowest_level_on_line and
107            (self.ignore_stack_for_comparison or
108             other.ignore_stack_for_comparison or
109             self.stack == other.stack and self.comp_stack == other.comp_stack))
110
111  def __ne__(self, other):
112    return not self == other
113
114  def __hash__(self):
115    return hash((self.next_token, self.column, self.paren_level,
116                 self.line.depth, self.lowest_level_on_line))
117
118  def __repr__(self):
119    return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' %
120            (self.column, repr(self.next_token), self.paren_level,
121             '\n\t'.join(repr(s) for s in self.stack) + ']'))
122
123  def CanSplit(self, must_split):
124    """Determine if we can split before the next token.
125
126    Arguments:
127      must_split: (bool) A newline was required before this token.
128
129    Returns:
130      True if the line can be split before the next token.
131    """
132    current = self.next_token
133    previous = current.previous_token
134
135    if current.is_pseudo_paren:
136      return False
137
138    if (not must_split and
139        format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes and
140        format_token.Subtype.DICTIONARY_KEY not in current.subtypes and
141        not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')):
142      # In some situations, a dictionary may be multiline, but pylint doesn't
143      # like it. So don't allow it unless forced to.
144      return False
145
146    if (not must_split and
147        format_token.Subtype.DICTIONARY_VALUE in current.subtypes and
148        not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')):
149      return False
150
151    if previous and previous.value == '(' and current.value == ')':
152      # Don't split an empty function call list if we aren't splitting before
153      # dict values.
154      token = previous.previous_token
155      while token:
156        prev = token.previous_token
157        if not prev or prev.name not in {'NAME', 'DOT'}:
158          break
159        token = token.previous_token
160      if token and format_token.Subtype.DICTIONARY_VALUE in token.subtypes:
161        if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'):
162          return False
163
164    if previous and previous.value == '.' and current.value == '.':
165      return False
166
167    return current.can_break_before
168
169  def MustSplit(self):
170    """Returns True if the line must split before the next token."""
171    current = self.next_token
172    previous = current.previous_token
173
174    if current.is_pseudo_paren:
175      return False
176
177    if current.must_break_before:
178      return True
179
180    if not previous:
181      return False
182
183    if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',':
184      return True
185
186    if (self.stack[-1].split_before_closing_bracket and
187        current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET')):
188      # Split before the closing bracket if we can.
189      return current.node_split_penalty != split_penalty.UNBREAKABLE
190
191    if (current.value == ')' and previous.value == ',' and
192        not _IsSingleElementTuple(current.matching_bracket)):
193      return True
194
195    # Prevent splitting before the first argument in compound statements
196    # with the exception of function declarations.
197    if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and
198        _IsCompoundStatement(self.line.first) and
199        not _IsFunctionDef(self.line.first)):
200      return False
201
202    ###########################################################################
203    # List Splitting
204    if (style.Get('DEDENT_CLOSING_BRACKETS') or
205        style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
206      bracket = current if current.ClosesScope() else previous
207      if format_token.Subtype.SUBSCRIPT_BRACKET not in bracket.subtypes:
208        if bracket.OpensScope():
209          if style.Get('COALESCE_BRACKETS'):
210            if current.OpensScope():
211              # Prefer to keep all opening brackets together.
212              return False
213
214          if (not _IsLastScopeInLine(bracket) or
215              unwrapped_line.IsSurroundedByBrackets(bracket)):
216            last_token = bracket.matching_bracket
217          else:
218            last_token = _LastTokenInLine(bracket.matching_bracket)
219
220          if not self._FitsOnLine(bracket, last_token):
221            # Split before the first element if the whole list can't fit on a
222            # single line.
223            self.stack[-1].split_before_closing_bracket = True
224            return True
225
226        elif style.Get('DEDENT_CLOSING_BRACKETS') and current.ClosesScope():
227          # Split before and dedent the closing bracket.
228          return self.stack[-1].split_before_closing_bracket
229
230    if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
231        current.is_name):
232      # An expression that's surrounded by parens gets split after the opening
233      # parenthesis.
234      def SurroundedByParens(token):
235        """Check if it's an expression surrounded by parentheses."""
236        while token:
237          if token.value == ',':
238            return False
239          if token.value == ')':
240            return not token.next_token
241          if token.OpensScope():
242            token = token.matching_bracket.next_token
243          else:
244            token = token.next_token
245        return False
246
247      if (previous.value == '(' and not previous.is_pseudo_paren and
248          not unwrapped_line.IsSurroundedByBrackets(previous)):
249        pptoken = previous.previous_token
250        if (pptoken and not pptoken.is_name and not pptoken.is_keyword and
251            SurroundedByParens(current)):
252          return True
253
254    if (current.is_name or current.is_string) and previous.value == ',':
255      # If the list has function calls in it and the full list itself cannot
256      # fit on the line, then we want to split. Otherwise, we'll get something
257      # like this:
258      #
259      #     X = [
260      #         Bar(xxx='some string',
261      #             yyy='another long string',
262      #             zzz='a third long string'), Bar(
263      #                 xxx='some string',
264      #                 yyy='another long string',
265      #                 zzz='a third long string')
266      #     ]
267      #
268      # or when a string formatting syntax.
269      func_call_or_string_format = False
270      tok = current.next_token
271      if current.is_name:
272        while tok and (tok.is_name or tok.value == '.'):
273          tok = tok.next_token
274        func_call_or_string_format = tok and tok.value == '('
275      elif current.is_string:
276        while tok and tok.is_string:
277          tok = tok.next_token
278        func_call_or_string_format = tok and tok.value == '%'
279      if func_call_or_string_format:
280        open_bracket = unwrapped_line.IsSurroundedByBrackets(current)
281        if open_bracket:
282          if open_bracket.value in '[{':
283            if not self._FitsOnLine(open_bracket,
284                                    open_bracket.matching_bracket):
285              return True
286          elif tok.value == '(':
287            if not self._FitsOnLine(current, tok.matching_bracket):
288              return True
289
290    ###########################################################################
291    # Dict/Set Splitting
292    if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
293        format_token.Subtype.DICTIONARY_KEY in current.subtypes and
294        not current.is_comment):
295      # Place each dictionary entry onto its own line.
296      if previous.value == '{' and previous.previous_token:
297        opening = _GetOpeningBracket(previous.previous_token)
298        if (opening and opening.value == '(' and opening.previous_token and
299            opening.previous_token.is_name):
300          # This is a dictionary that's an argument to a function.
301          if (self._FitsOnLine(previous, previous.matching_bracket) and
302              previous.matching_bracket.next_token and
303              (not opening.matching_bracket.next_token or
304               opening.matching_bracket.next_token.value != '.') and
305              _ScopeHasNoCommas(previous)):
306            # Don't split before the key if:
307            #   - The dictionary fits on a line, and
308            #   - The function call isn't part of a builder-style call and
309            #   - The dictionary has one entry and no trailing comma
310            return False
311      return True
312
313    if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and
314        format_token.Subtype.DICT_SET_GENERATOR in current.subtypes):
315      # Split before a dict/set generator.
316      return True
317
318    if (format_token.Subtype.DICTIONARY_VALUE in current.subtypes or
319        (previous.is_pseudo_paren and previous.value == '(' and
320         not current.is_comment)):
321      # Split before the dictionary value if we can't fit every dictionary
322      # entry on its own line.
323      if not current.OpensScope():
324        opening = _GetOpeningBracket(current)
325        if not self._EachDictEntryFitsOnOneLine(opening):
326          return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')
327
328    if previous.value == '{':
329      # Split if the dict/set cannot fit on one line and ends in a comma.
330      closing = previous.matching_bracket
331      if (not self._FitsOnLine(previous, closing) and
332          closing.previous_token.value == ','):
333        self.stack[-1].split_before_closing_bracket = True
334        return True
335
336    ###########################################################################
337    # Argument List Splitting
338    if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and
339        format_token.Subtype.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in
340        current.subtypes):
341      if (previous.value not in {'=', ':', '*', '**'} and
342          current.value not in ':=,)' and not _IsFunctionDefinition(previous)):
343        # If we're going to split the lines because of named arguments, then we
344        # want to split after the opening bracket as well. But not when this is
345        # part of a function definition.
346        if previous.value == '(':
347          # Make sure we don't split after the opening bracket if the
348          # continuation indent is greater than the opening bracket:
349          #
350          #  a(
351          #      b=1,
352          #      c=2)
353          if (self._FitsOnLine(previous, previous.matching_bracket) and
354              unwrapped_line.IsSurroundedByBrackets(previous)):
355            # An argument to a function is a function call with named
356            # assigns.
357            return False
358
359          column = self.column - self.stack[-1].last_space
360          return column > style.Get('CONTINUATION_INDENT_WIDTH')
361
362        opening = _GetOpeningBracket(current)
363        if opening:
364          arglist_length = (
365              opening.matching_bracket.total_length - opening.total_length +
366              self.stack[-1].indent)
367          return arglist_length > self.column_limit
368
369    if (current.value not in '{)' and previous.value == '(' and
370        self._ArgumentListHasDictionaryEntry(current)):
371      return True
372
373    if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'):
374      # Split before arguments in a function call or definition if the
375      # arguments are terminated by a comma.
376      opening = _GetOpeningBracket(current)
377      if opening and opening.previous_token and opening.previous_token.is_name:
378        if previous.value in '(,':
379          if opening.matching_bracket.previous_token.value == ',':
380            return True
381
382    if ((current.is_name or current.value in {'*', '**'}) and
383        previous.value == ','):
384      # If we have a function call within an argument list and it won't fit on
385      # the remaining line, but it will fit on a line by itself, then go ahead
386      # and split before the call.
387      opening = _GetOpeningBracket(current)
388      if (opening and opening.value == '(' and opening.previous_token and
389          (opening.previous_token.is_name or
390           opening.previous_token.value in {'*', '**'})):
391        is_func_call = False
392        opening = current
393        while opening:
394          if opening.value == '(':
395            is_func_call = True
396            break
397          if (not (opening.is_name or opening.value in {'*', '**'}) and
398              opening.value != '.'):
399            break
400          opening = opening.next_token
401
402        if is_func_call:
403          if (not self._FitsOnLine(current, opening.matching_bracket) or
404              (opening.matching_bracket.next_token and
405               opening.matching_bracket.next_token.value != ',' and
406               not opening.matching_bracket.next_token.ClosesScope())):
407            return True
408
409    pprevious = previous.previous_token
410    if (current.is_name and pprevious and pprevious.is_name and
411        previous.value == '('):
412      if (not self._FitsOnLine(previous, previous.matching_bracket) and
413          _IsFunctionCallWithArguments(current)):
414        # There is a function call, with more than 1 argument, where the first
415        # argument is itself a function call with arguments.  In this specific
416        # case, if we split after the first argument's opening '(', then the
417        # formatting will look bad for the rest of the arguments. E.g.:
418        #
419        #     outer_function_call(inner_function_call(
420        #         inner_arg1, inner_arg2),
421        #                         outer_arg1, outer_arg2)
422        #
423        # Instead, enforce a split before that argument to keep things looking
424        # good.
425        return True
426
427    if (previous.OpensScope() and not current.OpensScope() and
428        not current.is_comment and
429        format_token.Subtype.SUBSCRIPT_BRACKET not in previous.subtypes):
430      if pprevious and not pprevious.is_keyword and not pprevious.is_name:
431        # We want to split if there's a comment in the container.
432        token = current
433        while token != previous.matching_bracket:
434          if token.is_comment:
435            return True
436          token = token.next_token
437      if previous.value == '(':
438        pptoken = previous.previous_token
439        if not pptoken or not pptoken.is_name:
440          # Split after the opening of a tuple if it doesn't fit on the current
441          # line and it's not a function call.
442          if self._FitsOnLine(previous, previous.matching_bracket):
443            return False
444        elif not self._FitsOnLine(previous, previous.matching_bracket):
445          if len(previous.container_elements) == 1:
446            return False
447
448          elements = previous.container_elements + [previous.matching_bracket]
449          i = 1
450          while i < len(elements):
451            if (not elements[i - 1].OpensScope() and
452                not self._FitsOnLine(elements[i - 1], elements[i])):
453              return True
454            i += 1
455
456          if (self.column_limit - self.column) / float(self.column_limit) < 0.3:
457            # Try not to squish all of the arguments off to the right.
458            return True
459      else:
460        # Split after the opening of a container if it doesn't fit on the
461        # current line.
462        if not self._FitsOnLine(previous, previous.matching_bracket):
463          return True
464
465    ###########################################################################
466    # Original Formatting Splitting
467    # These checks rely upon the original formatting. This is in order to
468    # attempt to keep hand-written code in the same condition as it was before.
469    # However, this may cause the formatter to fail to be idempotent.
470    if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and
471        previous.lineno < current.lineno):
472      # Retain the split before a bitwise operator.
473      return True
474
475    if (current.is_comment and
476        previous.lineno < current.lineno - current.value.count('\n')):
477      # If a comment comes in the middle of an unwrapped line (like an if
478      # conditional with comments interspersed), then we want to split if the
479      # original comments were on a separate line.
480      return True
481
482    return False
483
484  def AddTokenToState(self, newline, dry_run, must_split=False):
485    """Add a token to the format decision state.
486
487    Allow the heuristic to try out adding the token with and without a newline.
488    Later on, the algorithm will determine which one has the lowest penalty.
489
490    Arguments:
491      newline: (bool) Add the token on a new line if True.
492      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
493        True.
494      must_split: (bool) A newline was required before this token.
495
496    Returns:
497      The penalty of splitting after the current token.
498    """
499    penalty = 0
500    if newline:
501      penalty = self._AddTokenOnNewline(dry_run, must_split)
502    else:
503      self._AddTokenOnCurrentLine(dry_run)
504
505    penalty += self._CalculateComprehensionState(newline)
506
507    return self.MoveStateToNextToken() + penalty
508
509  def _AddTokenOnCurrentLine(self, dry_run):
510    """Puts the token on the current line.
511
512    Appends the next token to the state and updates information necessary for
513    indentation.
514
515    Arguments:
516      dry_run: (bool) Commit whitespace changes to the FormatToken if True.
517    """
518    current = self.next_token
519    previous = current.previous_token
520
521    spaces = current.spaces_required_before
522    if not dry_run:
523      current.AddWhitespacePrefix(newlines_before=0, spaces=spaces)
524
525    if previous.OpensScope():
526      if not current.is_comment:
527        # Align closing scopes that are on a newline with the opening scope:
528        #
529        #     foo = [a,
530        #            b,
531        #           ]
532        self.stack[-1].closing_scope_indent = self.column - 1
533        if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'):
534          self.stack[-1].closing_scope_indent += 1
535        self.stack[-1].indent = self.column + spaces
536      else:
537        self.stack[-1].closing_scope_indent = (
538            self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
539
540    self.column += spaces
541
542  def _AddTokenOnNewline(self, dry_run, must_split):
543    """Adds a line break and necessary indentation.
544
545    Appends the next token to the state and updates information necessary for
546    indentation.
547
548    Arguments:
549      dry_run: (bool) Don't commit whitespace changes to the FormatToken if
550        True.
551      must_split: (bool) A newline was required before this token.
552
553    Returns:
554      The split penalty for splitting after the current state.
555    """
556    current = self.next_token
557    previous = current.previous_token
558
559    self.column = self._GetNewlineColumn()
560
561    if not dry_run:
562      indent_level = self.line.depth
563      spaces = self.column
564      if spaces:
565        spaces -= indent_level * style.Get('INDENT_WIDTH')
566      current.AddWhitespacePrefix(
567          newlines_before=1, spaces=spaces, indent_level=indent_level)
568
569    if not current.is_comment:
570      self.stack[-1].last_space = self.column
571    self.lowest_level_on_line = self.paren_level
572
573    if (previous.OpensScope() or
574        (previous.is_comment and previous.previous_token is not None and
575         previous.previous_token.OpensScope())):
576      self.stack[-1].closing_scope_indent = max(
577          0, self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
578
579      self.stack[-1].split_before_closing_bracket = True
580
581    # Calculate the split penalty.
582    penalty = current.split_penalty
583
584    if must_split:
585      # Don't penalize for a must split.
586      return penalty
587
588    if previous.is_pseudo_paren and previous.value == '(':
589      # Small penalty for splitting after a pseudo paren.
590      penalty += 50
591
592    # Add a penalty for each increasing newline we add, but don't penalize for
593    # splitting before an if-expression or list comprehension.
594    if current.value not in {'if', 'for'}:
595      last = self.stack[-1]
596      last.num_line_splits += 1
597      penalty += (
598          style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') *
599          last.num_line_splits)
600
601    if current.OpensScope() and previous.OpensScope():
602      # Prefer to keep opening brackets coalesced (unless it's at the beginning
603      # of a function call).
604      pprev = previous.previous_token
605      if not pprev or not pprev.is_name:
606        penalty += 10
607
608    return penalty + 10
609
610  def MoveStateToNextToken(self):
611    """Calculate format decision state information and move onto the next token.
612
613    Before moving onto the next token, we first calculate the format decision
614    state given the current token and its formatting decisions. Then the format
615    decision state is set up so that the next token can be added.
616
617    Returns:
618      The penalty for the number of characters over the column limit.
619    """
620    current = self.next_token
621    if not current.OpensScope() and not current.ClosesScope():
622      self.lowest_level_on_line = min(self.lowest_level_on_line,
623                                      self.paren_level)
624
625    # If we encounter an opening bracket, we add a level to our stack to prepare
626    # for the subsequent tokens.
627    if current.OpensScope():
628      last = self.stack[-1]
629      new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space
630
631      self.stack.append(_ParenState(new_indent, self.stack[-1].last_space))
632      self.paren_level += 1
633
634    # If we encounter a closing bracket, we can remove a level from our
635    # parenthesis stack.
636    if len(self.stack) > 1 and current.ClosesScope():
637      if format_token.Subtype.DICTIONARY_KEY_PART in current.subtypes:
638        self.stack[-2].last_space = self.stack[-2].indent
639      else:
640        self.stack[-2].last_space = self.stack[-1].last_space
641      self.stack.pop()
642      self.paren_level -= 1
643
644    is_multiline_string = current.is_string and '\n' in current.value
645    if is_multiline_string:
646      # This is a multiline string. Only look at the first line.
647      self.column += len(current.value.split('\n')[0])
648    elif not current.is_pseudo_paren:
649      self.column += len(current.value)
650
651    self.next_token = self.next_token.next_token
652
653    # Calculate the penalty for overflowing the column limit.
654    penalty = 0
655    if (not current.is_pylint_comment and not current.is_pytype_comment and
656        self.column > self.column_limit):
657      excess_characters = self.column - self.column_limit
658      penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters
659
660    if is_multiline_string:
661      # If this is a multiline string, the column is actually the
662      # end of the last line in the string.
663      self.column = len(current.value.split('\n')[-1])
664
665    return penalty
666
667  def _CalculateComprehensionState(self, newline):
668    """Makes required changes to comprehension state.
669
670    Args:
671      newline: Whether the current token is to be added on a newline.
672
673    Returns:
674      The penalty for the token-newline combination given the current
675      comprehension state.
676    """
677    current = self.next_token
678    previous = current.previous_token
679    top_of_stack = self.comp_stack[-1] if self.comp_stack else None
680    penalty = 0
681
682    if top_of_stack is not None:
683      # Check if the token terminates the current comprehension.
684      if current == top_of_stack.closing_bracket:
685        last = self.comp_stack.pop()
686        # Lightly penalize comprehensions that are split across multiple lines.
687        if last.has_interior_split:
688          penalty += style.Get('SPLIT_PENALTY_COMPREHENSION')
689
690        return penalty
691
692      if newline:
693        top_of_stack.has_interior_split = True
694
695    if (format_token.Subtype.COMP_EXPR in current.subtypes and
696        format_token.Subtype.COMP_EXPR not in previous.subtypes):
697      self.comp_stack.append(object_state.ComprehensionState(current))
698      return penalty
699
700    if (current.value == 'for' and
701        format_token.Subtype.COMP_FOR in current.subtypes):
702      if top_of_stack.for_token is not None:
703        # Treat nested comprehensions like normal comp_if expressions.
704        # Example:
705        #     my_comp = [
706        #         a.qux + b.qux
707        #         for a in foo
708        #   -->   for b in bar   <--
709        #         if a.zut + b.zut
710        #     ]
711        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
712            top_of_stack.has_split_at_for != newline and
713            (top_of_stack.has_split_at_for or
714             not top_of_stack.HasTrivialExpr())):
715          penalty += split_penalty.UNBREAKABLE
716      else:
717        top_of_stack.for_token = current
718        top_of_stack.has_split_at_for = newline
719
720        # Try to keep trivial expressions on the same line as the comp_for.
721        if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and
722            top_of_stack.HasTrivialExpr()):
723          penalty += split_penalty.CONNECTED
724
725    if (format_token.Subtype.COMP_IF in current.subtypes and
726        format_token.Subtype.COMP_IF not in previous.subtypes):
727      # Penalize breaking at comp_if when it doesn't match the newline structure
728      # in the rest of the comprehension.
729      if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
730          top_of_stack.has_split_at_for != newline and
731          (top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())):
732        penalty += split_penalty.UNBREAKABLE
733
734    return penalty
735
736  def _GetNewlineColumn(self):
737    """Return the new column on the newline."""
738    current = self.next_token
739    previous = current.previous_token
740    top_of_stack = self.stack[-1]
741
742    if current.spaces_required_before > 2 or self.line.disable:
743      return current.spaces_required_before
744
745    if current.OpensScope():
746      return top_of_stack.indent if self.paren_level else self.first_indent
747
748    if current.ClosesScope():
749      if (previous.OpensScope() or
750          (previous.is_comment and previous.previous_token is not None and
751           previous.previous_token.OpensScope())):
752        return max(0,
753                   top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH'))
754      return top_of_stack.closing_scope_indent
755
756    if (previous and previous.is_string and current.is_string and
757        format_token.Subtype.DICTIONARY_VALUE in current.subtypes):
758      return previous.column
759
760    if style.Get('INDENT_DICTIONARY_VALUE'):
761      if previous and (previous.value == ':' or previous.is_pseudo_paren):
762        if format_token.Subtype.DICTIONARY_VALUE in current.subtypes:
763          return top_of_stack.indent
764
765    if (_IsCompoundStatement(self.line.first) and
766        (not style.Get('DEDENT_CLOSING_BRACKETS') or
767         style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))):
768      token_indent = (
769          len(self.line.first.whitespace_prefix.split('\n')[-1]) +
770          style.Get('INDENT_WIDTH'))
771      if token_indent == top_of_stack.indent:
772        return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH')
773
774    return top_of_stack.indent
775
776  def _FitsOnLine(self, start, end):
777    """Determines if line between start and end can fit on the current line."""
778    length = end.total_length - start.total_length
779    if not start.is_pseudo_paren:
780      length += len(start.value)
781    return length + self.column <= self.column_limit
782
783  def _EachDictEntryFitsOnOneLine(self, opening):
784    """Determine if each dict elems can fit on one line."""
785
786    def PreviousNonCommentToken(tok):
787      tok = tok.previous_token
788      while tok.is_comment:
789        tok = tok.previous_token
790      return tok
791
792    def ImplicitStringConcatenation(tok):
793      num_strings = 0
794      if tok.is_pseudo_paren:
795        tok = tok.next_token
796      while tok.is_string:
797        num_strings += 1
798        tok = tok.next_token
799      return num_strings > 1
800
801    closing = opening.matching_bracket
802    entry_start = opening.next_token
803    current = opening.next_token.next_token
804
805    while current and current != closing:
806      if format_token.Subtype.DICTIONARY_KEY in current.subtypes:
807        prev = PreviousNonCommentToken(current)
808        length = prev.total_length - entry_start.total_length
809        length += len(entry_start.value)
810        if length + self.stack[-2].indent >= self.column_limit:
811          return False
812        entry_start = current
813      if current.OpensScope():
814        if ((current.value == '{' or
815             (current.is_pseudo_paren and current.next_token.value == '{') and
816             format_token.Subtype.DICTIONARY_VALUE in current.subtypes) or
817            ImplicitStringConcatenation(current)):
818          # A dictionary entry that cannot fit on a single line shouldn't matter
819          # to this calculation. If it can't fit on a single line, then the
820          # opening should be on the same line as the key and the rest on
821          # newlines after it. But the other entries should be on single lines
822          # if possible.
823          if current.matching_bracket:
824            current = current.matching_bracket
825          while current:
826            if current == closing:
827              return True
828            if format_token.Subtype.DICTIONARY_KEY in current.subtypes:
829              entry_start = current
830              break
831            current = current.next_token
832        else:
833          current = current.matching_bracket
834      else:
835        current = current.next_token
836
837    # At this point, current is the closing bracket. Go back one to get the the
838    # end of the dictionary entry.
839    current = PreviousNonCommentToken(current)
840    length = current.total_length - entry_start.total_length
841    length += len(entry_start.value)
842    return length + self.stack[-2].indent <= self.column_limit
843
844  def _ArgumentListHasDictionaryEntry(self, token):
845    """Check if the function argument list has a dictionary as an arg."""
846    if _IsArgumentToFunction(token):
847      while token:
848        if token.value == '{':
849          length = token.matching_bracket.total_length - token.total_length
850          return length + self.stack[-2].indent > self.column_limit
851        if token.ClosesScope():
852          break
853        if token.OpensScope():
854          token = token.matching_bracket
855        token = token.next_token
856    return False
857
858
859_COMPOUND_STMTS = frozenset(
860    {'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'})
861
862
863def _IsCompoundStatement(token):
864  if token.value == 'async':
865    token = token.next_token
866  return token.value in _COMPOUND_STMTS
867
868
869def _IsFunctionDef(token):
870  if token.value == 'async':
871    token = token.next_token
872  return token.value == 'def'
873
874
875def _IsFunctionCallWithArguments(token):
876  while token:
877    if token.value == '(':
878      token = token.next_token
879      return token and token.value != ')'
880    elif token.name not in {'NAME', 'DOT', 'EQUAL'}:
881      break
882    token = token.next_token
883  return False
884
885
886def _IsArgumentToFunction(token):
887  bracket = unwrapped_line.IsSurroundedByBrackets(token)
888  if not bracket or bracket.value != '(':
889    return False
890  previous = bracket.previous_token
891  return previous and previous.is_name
892
893
894def _GetLengthOfSubtype(token, subtype, exclude=None):
895  current = token
896  while (current.next_token and subtype in current.subtypes and
897         (exclude is None or exclude not in current.subtypes)):
898    current = current.next_token
899  return current.total_length - token.total_length + 1
900
901
902def _GetOpeningBracket(current):
903  """Get the opening bracket containing the current token."""
904  if current.matching_bracket and not current.is_pseudo_paren:
905    return current.matching_bracket
906  while current:
907    if current.ClosesScope():
908      current = current.matching_bracket
909    elif current.is_pseudo_paren:
910      current = current.previous_token
911    elif current.OpensScope():
912      return current
913    current = current.previous_token
914  return None
915
916
917def _LastTokenInLine(current):
918  while not current.is_comment and current.next_token:
919    current = current.next_token
920  return current
921
922
923def _IsFunctionDefinition(current):
924  prev = current.previous_token
925  return (current.value == '(' and prev and
926          format_token.Subtype.FUNC_DEF in prev.subtypes)
927
928
929def _IsLastScopeInLine(current):
930  while current:
931    current = current.next_token
932    if current and current.OpensScope():
933      return False
934  return True
935
936
937def _IsSingleElementTuple(token):
938  """Check if it's a single-element tuple."""
939  close = token.matching_bracket
940  token = token.next_token
941  num_commas = 0
942  while token != close:
943    if token.value == ',':
944      num_commas += 1
945    if token.OpensScope():
946      token = token.matching_bracket
947    else:
948      token = token.next_token
949  return num_commas == 1
950
951
952def _ScopeHasNoCommas(token):
953  """Check if the scope has no commas."""
954  close = token.matching_bracket
955  token = token.next_token
956  while token != close:
957    if token.value == ',':
958      return False
959    if token.OpensScope():
960      token = token.matching_bracket
961    else:
962      token = token.next_token
963  return True
964
965
966class _ParenState(object):
967  """Maintains the state of the bracket enclosures.
968
969  A stack of _ParenState objects are kept so that we know how to indent relative
970  to the brackets.
971
972  Attributes:
973    indent: The column position to which a specified parenthesis level needs to
974      be indented.
975    last_space: The column position of the last space on each level.
976    split_before_closing_bracket: Whether a newline needs to be inserted before
977      the closing bracket. We only want to insert a newline before the closing
978      bracket if there also was a newline after the beginning left bracket.
979    num_line_splits: Number of line splits this _ParenState contains already.
980      Each subsequent line split gets an increasing penalty.
981  """
982
983  # TODO(morbo): This doesn't track "bin packing."
984
985  def __init__(self, indent, last_space):
986    self.indent = indent
987    self.last_space = last_space
988    self.closing_scope_indent = 0
989    self.split_before_closing_bracket = False
990    self.num_line_splits = 0
991
992  def Clone(self):
993    state = _ParenState(self.indent, self.last_space)
994    state.closing_scope_indent = self.closing_scope_indent
995    state.split_before_closing_bracket = self.split_before_closing_bracket
996    state.num_line_splits = self.num_line_splits
997    return state
998
999  def __repr__(self):
1000    return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % (
1001        self.indent, self.last_space, self.closing_scope_indent)
1002
1003  def __eq__(self, other):
1004    return hash(self) == hash(other)
1005
1006  def __ne__(self, other):
1007    return not self == other
1008
1009  def __hash__(self, *args, **kwargs):
1010    return hash((self.indent, self.last_space, self.closing_scope_indent,
1011                 self.split_before_closing_bracket, self.num_line_splits))
1012