1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 
24 namespace clang {
25 class SourceManager;
26 
27 namespace format {
28 
29 class AnnotatedLine;
30 struct FormatToken;
31 struct LineState;
32 struct ParenState;
33 class WhitespaceManager;
34 
35 class ContinuationIndenter {
36 public:
37   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38   /// column \p FirstIndent.
39   ContinuationIndenter(const FormatStyle &Style,
40                        const AdditionalKeywords &Keywords,
41                        const SourceManager &SourceMgr,
42                        WhitespaceManager &Whitespaces,
43                        encoding::Encoding Encoding,
44                        bool BinPackInconclusiveFunctions);
45 
46   /// \brief Get the initial state, i.e. the state after placing \p Line's
47   /// first token at \p FirstIndent.
48   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
49                             bool DryRun);
50 
51   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
52   // better home.
53   /// \brief Returns \c true, if a line break after \p State is allowed.
54   bool canBreak(const LineState &State);
55 
56   /// \brief Returns \c true, if a line break after \p State is mandatory.
57   bool mustBreak(const LineState &State);
58 
59   /// \brief Appends the next token to \p State and updates information
60   /// necessary for indentation.
61   ///
62   /// Puts the token on the current line if \p Newline is \c false and adds a
63   /// line break and necessary indentation otherwise.
64   ///
65   /// If \p DryRun is \c false, also creates and stores the required
66   /// \c Replacement.
67   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
68                            unsigned ExtraSpaces = 0);
69 
70   /// \brief Get the column limit for this line. This is the style's column
71   /// limit, potentially reduced for preprocessor definitions.
72   unsigned getColumnLimit(const LineState &State) const;
73 
74 private:
75   /// \brief Mark the next token as consumed in \p State and modify its stacks
76   /// accordingly.
77   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
78 
79   /// \brief Update 'State' according to the next token's fake left parentheses.
80   void moveStatePastFakeLParens(LineState &State, bool Newline);
81   /// \brief Update 'State' according to the next token's fake r_parens.
82   void moveStatePastFakeRParens(LineState &State);
83 
84   /// \brief Update 'State' according to the next token being one of "(<{[".
85   void moveStatePastScopeOpener(LineState &State, bool Newline);
86   /// \brief Update 'State' according to the next token being one of ")>}]".
87   void moveStatePastScopeCloser(LineState &State);
88   /// \brief Update 'State' with the next token opening a nested block.
89   void moveStateToNewBlock(LineState &State);
90 
91   /// \brief If the current token sticks out over the end of the line, break
92   /// it if possible.
93   ///
94   /// \returns An extra penalty if a token was broken, otherwise 0.
95   ///
96   /// The returned penalty will cover the cost of the additional line breaks and
97   /// column limit violation in all lines except for the last one. The penalty
98   /// for the column limit violation in the last line (and in single line
99   /// tokens) is handled in \c addNextStateToQueue.
100   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
101                                 bool DryRun);
102 
103   /// \brief Appends the next token to \p State and updates information
104   /// necessary for indentation.
105   ///
106   /// Puts the token on the current line.
107   ///
108   /// If \p DryRun is \c false, also creates and stores the required
109   /// \c Replacement.
110   void addTokenOnCurrentLine(LineState &State, bool DryRun,
111                              unsigned ExtraSpaces);
112 
113   /// \brief Appends the next token to \p State and updates information
114   /// necessary for indentation.
115   ///
116   /// Adds a line break and necessary indentation.
117   ///
118   /// If \p DryRun is \c false, also creates and stores the required
119   /// \c Replacement.
120   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
121 
122   /// \brief Calculate the new column for a line wrap before the next token.
123   unsigned getNewLineColumn(const LineState &State);
124 
125   /// \brief Adds a multiline token to the \p State.
126   ///
127   /// \returns Extra penalty for the first line of the literal: last line is
128   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
129   /// matter, as we don't change them.
130   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
131 
132   /// \brief Returns \c true if the next token starts a multiline string
133   /// literal.
134   ///
135   /// This includes implicitly concatenated strings, strings that will be broken
136   /// by clang-format and string literals with escaped newlines.
137   bool nextIsMultilineString(const LineState &State);
138 
139   FormatStyle Style;
140   const AdditionalKeywords &Keywords;
141   const SourceManager &SourceMgr;
142   WhitespaceManager &Whitespaces;
143   encoding::Encoding Encoding;
144   bool BinPackInconclusiveFunctions;
145   llvm::Regex CommentPragmasRegex;
146 };
147 
148 struct ParenState {
ParenStateParenState149   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
150              bool AvoidBinPacking, bool NoLineBreak)
151       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
152         NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
153         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
154         NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
155         ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
156         AlignColons(true), ObjCSelectorNameFound(false),
157         HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
158 
159   /// \brief The position to which a specific parenthesis level needs to be
160   /// indented.
161   unsigned Indent;
162 
163   /// \brief The number of indentation levels of the block.
164   unsigned IndentLevel;
165 
166   /// \brief The position of the last space on each level.
167   ///
168   /// Used e.g. to break like:
169   /// functionCall(Parameter, otherCall(
170   ///                             OtherParameter));
171   unsigned LastSpace;
172 
173   /// \brief If a block relative to this parenthesis level gets wrapped, indent
174   /// it this much.
175   unsigned NestedBlockIndent;
176 
177   /// \brief The position the first "<<" operator encountered on each level.
178   ///
179   /// Used to align "<<" operators. 0 if no such operator has been encountered
180   /// on a level.
181   unsigned FirstLessLess = 0;
182 
183   /// \brief The column of a \c ? in a conditional expression;
184   unsigned QuestionColumn = 0;
185 
186   /// \brief The position of the colon in an ObjC method declaration/call.
187   unsigned ColonPos = 0;
188 
189   /// \brief The start of the most recent function in a builder-type call.
190   unsigned StartOfFunctionCall = 0;
191 
192   /// \brief Contains the start of array subscript expressions, so that they
193   /// can be aligned.
194   unsigned StartOfArraySubscripts = 0;
195 
196   /// \brief If a nested name specifier was broken over multiple lines, this
197   /// contains the start column of the second line. Otherwise 0.
198   unsigned NestedNameSpecifierContinuation = 0;
199 
200   /// \brief If a call expression was broken over multiple lines, this
201   /// contains the start column of the second line. Otherwise 0.
202   unsigned CallContinuation = 0;
203 
204   /// \brief The column of the first variable name in a variable declaration.
205   ///
206   /// Used to align further variables if necessary.
207   unsigned VariablePos = 0;
208 
209   /// \brief Whether a newline needs to be inserted before the block's closing
210   /// brace.
211   ///
212   /// We only want to insert a newline before the closing brace if there also
213   /// was a newline after the beginning left brace.
214   bool BreakBeforeClosingBrace : 1;
215 
216   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
217   /// lines, in this context.
218   bool AvoidBinPacking : 1;
219 
220   /// \brief Break after the next comma (or all the commas in this context if
221   /// \c AvoidBinPacking is \c true).
222   bool BreakBeforeParameter : 1;
223 
224   /// \brief Line breaking in this context would break a formatting rule.
225   bool NoLineBreak : 1;
226 
227   /// \brief True if the last binary operator on this level was wrapped to the
228   /// next line.
229   bool LastOperatorWrapped : 1;
230 
231   /// \brief \c true if this \c ParenState already contains a line-break.
232   ///
233   /// The first line break in a certain \c ParenState causes extra penalty so
234   /// that clang-format prefers similar breaks, i.e. breaks in the same
235   /// parenthesis.
236   bool ContainsLineBreak : 1;
237 
238   /// \brief \c true if this \c ParenState contains multiple segments of a
239   /// builder-type call on one line.
240   bool ContainsUnwrappedBuilder : 1;
241 
242   /// \brief \c true if the colons of the curren ObjC method expression should
243   /// be aligned.
244   ///
245   /// Not considered for memoization as it will always have the same value at
246   /// the same token.
247   bool AlignColons : 1;
248 
249   /// \brief \c true if at least one selector name was found in the current
250   /// ObjC method expression.
251   ///
252   /// Not considered for memoization as it will always have the same value at
253   /// the same token.
254   bool ObjCSelectorNameFound : 1;
255 
256   /// \brief \c true if there are multiple nested blocks inside these parens.
257   ///
258   /// Not considered for memoization as it will always have the same value at
259   /// the same token.
260   bool HasMultipleNestedBlocks : 1;
261 
262   // \brief The start of a nested block (e.g. lambda introducer in C++ or
263   // "function" in JavaScript) is not wrapped to a new line.
264   bool NestedBlockInlined : 1;
265 
266   bool operator<(const ParenState &Other) const {
267     if (Indent != Other.Indent)
268       return Indent < Other.Indent;
269     if (LastSpace != Other.LastSpace)
270       return LastSpace < Other.LastSpace;
271     if (NestedBlockIndent != Other.NestedBlockIndent)
272       return NestedBlockIndent < Other.NestedBlockIndent;
273     if (FirstLessLess != Other.FirstLessLess)
274       return FirstLessLess < Other.FirstLessLess;
275     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
276       return BreakBeforeClosingBrace;
277     if (QuestionColumn != Other.QuestionColumn)
278       return QuestionColumn < Other.QuestionColumn;
279     if (AvoidBinPacking != Other.AvoidBinPacking)
280       return AvoidBinPacking;
281     if (BreakBeforeParameter != Other.BreakBeforeParameter)
282       return BreakBeforeParameter;
283     if (NoLineBreak != Other.NoLineBreak)
284       return NoLineBreak;
285     if (LastOperatorWrapped != Other.LastOperatorWrapped)
286       return LastOperatorWrapped;
287     if (ColonPos != Other.ColonPos)
288       return ColonPos < Other.ColonPos;
289     if (StartOfFunctionCall != Other.StartOfFunctionCall)
290       return StartOfFunctionCall < Other.StartOfFunctionCall;
291     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
292       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
293     if (CallContinuation != Other.CallContinuation)
294       return CallContinuation < Other.CallContinuation;
295     if (VariablePos != Other.VariablePos)
296       return VariablePos < Other.VariablePos;
297     if (ContainsLineBreak != Other.ContainsLineBreak)
298       return ContainsLineBreak;
299     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
300       return ContainsUnwrappedBuilder;
301     if (NestedBlockInlined != Other.NestedBlockInlined)
302       return NestedBlockInlined;
303     return false;
304   }
305 };
306 
307 /// \brief The current state when indenting a unwrapped line.
308 ///
309 /// As the indenting tries different combinations this is copied by value.
310 struct LineState {
311   /// \brief The number of used columns in the current line.
312   unsigned Column;
313 
314   /// \brief The token that needs to be next formatted.
315   FormatToken *NextToken;
316 
317   /// \brief \c true if this line contains a continued for-loop section.
318   bool LineContainsContinuedForLoopSection;
319 
320   /// \brief The \c NestingLevel at the start of this line.
321   unsigned StartOfLineLevel;
322 
323   /// \brief The lowest \c NestingLevel on the current line.
324   unsigned LowestLevelOnLine;
325 
326   /// \brief The start column of the string literal, if we're in a string
327   /// literal sequence, 0 otherwise.
328   unsigned StartOfStringLiteral;
329 
330   /// \brief A stack keeping track of properties applying to parenthesis
331   /// levels.
332   std::vector<ParenState> Stack;
333 
334   /// \brief Ignore the stack of \c ParenStates for state comparison.
335   ///
336   /// In long and deeply nested unwrapped lines, the current algorithm can
337   /// be insufficient for finding the best formatting with a reasonable amount
338   /// of time and memory. Setting this flag will effectively lead to the
339   /// algorithm not analyzing some combinations. However, these combinations
340   /// rarely contain the optimal solution: In short, accepting a higher
341   /// penalty early would need to lead to different values in the \c
342   /// ParenState stack (in an otherwise identical state) and these different
343   /// values would need to lead to a significant amount of avoided penalty
344   /// later.
345   ///
346   /// FIXME: Come up with a better algorithm instead.
347   bool IgnoreStackForComparison;
348 
349   /// \brief The indent of the first token.
350   unsigned FirstIndent;
351 
352   /// \brief The line that is being formatted.
353   ///
354   /// Does not need to be considered for memoization because it doesn't change.
355   const AnnotatedLine *Line;
356 
357   /// \brief Comparison operator to be able to used \c LineState in \c map.
358   bool operator<(const LineState &Other) const {
359     if (NextToken != Other.NextToken)
360       return NextToken < Other.NextToken;
361     if (Column != Other.Column)
362       return Column < Other.Column;
363     if (LineContainsContinuedForLoopSection !=
364         Other.LineContainsContinuedForLoopSection)
365       return LineContainsContinuedForLoopSection;
366     if (StartOfLineLevel != Other.StartOfLineLevel)
367       return StartOfLineLevel < Other.StartOfLineLevel;
368     if (LowestLevelOnLine != Other.LowestLevelOnLine)
369       return LowestLevelOnLine < Other.LowestLevelOnLine;
370     if (StartOfStringLiteral != Other.StartOfStringLiteral)
371       return StartOfStringLiteral < Other.StartOfStringLiteral;
372     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
373       return false;
374     return Stack < Other.Stack;
375   }
376 };
377 
378 } // end namespace format
379 } // end namespace clang
380 
381 #endif
382