1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
~FormatTokenSource()28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
~ScopedDeclarationState()45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken,bool & StructuralError)60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken, bool &StructuralError)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         StructuralError(StructuralError),
65         PreviousStructuralError(StructuralError), Token(nullptr) {
66     TokenSource = this;
67     Line.Level = 0;
68     Line.InPPDirective = true;
69   }
70 
~ScopedMacroState()71   ~ScopedMacroState() override {
72     TokenSource = PreviousTokenSource;
73     ResetToken = Token;
74     Line.InPPDirective = false;
75     Line.Level = PreviousLineLevel;
76     StructuralError = PreviousStructuralError;
77   }
78 
getNextToken()79   FormatToken *getNextToken() override {
80     // The \c UnwrappedLineParser guards against this by never calling
81     // \c getNextToken() after it has encountered the first eof token.
82     assert(!eof());
83     Token = PreviousTokenSource->getNextToken();
84     if (eof())
85       return getFakeEOF();
86     return Token;
87   }
88 
getPosition()89   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
90 
setPosition(unsigned Position)91   FormatToken *setPosition(unsigned Position) override {
92     Token = PreviousTokenSource->setPosition(Position);
93     return Token;
94   }
95 
96 private:
eof()97   bool eof() { return Token && Token->HasUnescapedNewline; }
98 
getFakeEOF()99   FormatToken *getFakeEOF() {
100     static bool EOFInitialized = false;
101     static FormatToken FormatTok;
102     if (!EOFInitialized) {
103       FormatTok.Tok.startToken();
104       FormatTok.Tok.setKind(tok::eof);
105       EOFInitialized = true;
106     }
107     return &FormatTok;
108   }
109 
110   UnwrappedLine &Line;
111   FormatTokenSource *&TokenSource;
112   FormatToken *&ResetToken;
113   unsigned PreviousLineLevel;
114   FormatTokenSource *PreviousTokenSource;
115   bool &StructuralError;
116   bool PreviousStructuralError;
117 
118   FormatToken *Token;
119 };
120 
121 } // end anonymous namespace
122 
123 class ScopedLineState {
124 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)125   ScopedLineState(UnwrappedLineParser &Parser,
126                   bool SwitchToPreprocessorLines = false)
127       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
128     if (SwitchToPreprocessorLines)
129       Parser.CurrentLines = &Parser.PreprocessorDirectives;
130     else if (!Parser.Line->Tokens.empty())
131       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
132     PreBlockLine = std::move(Parser.Line);
133     Parser.Line = llvm::make_unique<UnwrappedLine>();
134     Parser.Line->Level = PreBlockLine->Level;
135     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
136   }
137 
~ScopedLineState()138   ~ScopedLineState() {
139     if (!Parser.Line->Tokens.empty()) {
140       Parser.addUnwrappedLine();
141     }
142     assert(Parser.Line->Tokens.empty());
143     Parser.Line = std::move(PreBlockLine);
144     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
145       Parser.MustBreakBeforeNextToken = true;
146     Parser.CurrentLines = OriginalLines;
147   }
148 
149 private:
150   UnwrappedLineParser &Parser;
151 
152   std::unique_ptr<UnwrappedLine> PreBlockLine;
153   SmallVectorImpl<UnwrappedLine> *OriginalLines;
154 };
155 
156 class CompoundStatementIndenter {
157 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)158   CompoundStatementIndenter(UnwrappedLineParser *Parser,
159                             const FormatStyle &Style, unsigned &LineLevel)
160       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
161     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
162       Parser->addUnwrappedLine();
163     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
164       Parser->addUnwrappedLine();
165       ++LineLevel;
166     }
167   }
~CompoundStatementIndenter()168   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
169 
170 private:
171   unsigned &LineLevel;
172   unsigned OldLineLevel;
173 };
174 
175 namespace {
176 
177 class IndexedTokenSource : public FormatTokenSource {
178 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)179   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
180       : Tokens(Tokens), Position(-1) {}
181 
getNextToken()182   FormatToken *getNextToken() override {
183     ++Position;
184     return Tokens[Position];
185   }
186 
getPosition()187   unsigned getPosition() override {
188     assert(Position >= 0);
189     return Position;
190   }
191 
setPosition(unsigned P)192   FormatToken *setPosition(unsigned P) override {
193     Position = P;
194     return Tokens[Position];
195   }
196 
reset()197   void reset() { Position = -1; }
198 
199 private:
200   ArrayRef<FormatToken *> Tokens;
201   int Position;
202 };
203 
204 } // end anonymous namespace
205 
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)206 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
207                                          const AdditionalKeywords &Keywords,
208                                          ArrayRef<FormatToken *> Tokens,
209                                          UnwrappedLineConsumer &Callback)
210     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
211       CurrentLines(&Lines), StructuralError(false), Style(Style),
212       Keywords(Keywords), Tokens(nullptr), Callback(Callback),
213       AllTokens(Tokens), PPBranchLevel(-1) {}
214 
reset()215 void UnwrappedLineParser::reset() {
216   PPBranchLevel = -1;
217   Line.reset(new UnwrappedLine);
218   CommentsBeforeNextToken.clear();
219   FormatTok = nullptr;
220   MustBreakBeforeNextToken = false;
221   PreprocessorDirectives.clear();
222   CurrentLines = &Lines;
223   DeclarationScopeStack.clear();
224   StructuralError = false;
225   PPStack.clear();
226 }
227 
parse()228 bool UnwrappedLineParser::parse() {
229   IndexedTokenSource TokenSource(AllTokens);
230   do {
231     DEBUG(llvm::dbgs() << "----\n");
232     reset();
233     Tokens = &TokenSource;
234     TokenSource.reset();
235 
236     readToken();
237     parseFile();
238     // Create line with eof token.
239     pushToken(FormatTok);
240     addUnwrappedLine();
241 
242     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
243                                                   E = Lines.end();
244          I != E; ++I) {
245       Callback.consumeUnwrappedLine(*I);
246     }
247     Callback.finishRun();
248     Lines.clear();
249     while (!PPLevelBranchIndex.empty() &&
250            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
252       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
253     }
254     if (!PPLevelBranchIndex.empty()) {
255       ++PPLevelBranchIndex.back();
256       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258     }
259   } while (!PPLevelBranchIndex.empty());
260 
261   return StructuralError;
262 }
263 
parseFile()264 void UnwrappedLineParser::parseFile() {
265   ScopedDeclarationState DeclarationState(
266       *Line, DeclarationScopeStack,
267       /*MustBeDeclaration=*/!Line->InPPDirective);
268   parseLevel(/*HasOpeningBrace=*/false);
269   // Make sure to format the remaining tokens.
270   flushComments(true);
271   addUnwrappedLine();
272 }
273 
parseLevel(bool HasOpeningBrace)274 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
275   bool SwitchLabelEncountered = false;
276   do {
277     switch (FormatTok->Tok.getKind()) {
278     case tok::comment:
279       nextToken();
280       addUnwrappedLine();
281       break;
282     case tok::l_brace:
283       // FIXME: Add parameter whether this can happen - if this happens, we must
284       // be in a non-declaration context.
285       parseBlock(/*MustBeDeclaration=*/false);
286       addUnwrappedLine();
287       break;
288     case tok::r_brace:
289       if (HasOpeningBrace)
290         return;
291       StructuralError = true;
292       nextToken();
293       addUnwrappedLine();
294       break;
295     case tok::kw_default:
296     case tok::kw_case:
297       if (!SwitchLabelEncountered &&
298           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
299         ++Line->Level;
300       SwitchLabelEncountered = true;
301       parseStructuralElement();
302       break;
303     default:
304       parseStructuralElement();
305       break;
306     }
307   } while (!eof());
308 }
309 
calculateBraceTypes()310 void UnwrappedLineParser::calculateBraceTypes() {
311   // We'll parse forward through the tokens until we hit
312   // a closing brace or eof - note that getNextToken() will
313   // parse macros, so this will magically work inside macro
314   // definitions, too.
315   unsigned StoredPosition = Tokens->getPosition();
316   FormatToken *Tok = FormatTok;
317   // Keep a stack of positions of lbrace tokens. We will
318   // update information about whether an lbrace starts a
319   // braced init list or a different block during the loop.
320   SmallVector<FormatToken *, 8> LBraceStack;
321   assert(Tok->Tok.is(tok::l_brace));
322   do {
323     // Get next none-comment token.
324     FormatToken *NextTok;
325     unsigned ReadTokens = 0;
326     do {
327       NextTok = Tokens->getNextToken();
328       ++ReadTokens;
329     } while (NextTok->is(tok::comment));
330 
331     switch (Tok->Tok.getKind()) {
332     case tok::l_brace:
333       LBraceStack.push_back(Tok);
334       break;
335     case tok::r_brace:
336       if (!LBraceStack.empty()) {
337         if (LBraceStack.back()->BlockKind == BK_Unknown) {
338           bool ProbablyBracedList = false;
339           if (Style.Language == FormatStyle::LK_Proto) {
340             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
341           } else {
342             // Using OriginalColumn to distinguish between ObjC methods and
343             // binary operators is a bit hacky.
344             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
345                                     NextTok->OriginalColumn == 0;
346 
347             // If there is a comma, semicolon or right paren after the closing
348             // brace, we assume this is a braced initializer list.  Note that
349             // regardless how we mark inner braces here, we will overwrite the
350             // BlockKind later if we parse a braced list (where all blocks
351             // inside are by default braced lists), or when we explicitly detect
352             // blocks (for example while parsing lambdas).
353             //
354             // We exclude + and - as they can be ObjC visibility modifiers.
355             ProbablyBracedList =
356                 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon,
357                                  tok::r_paren, tok::r_square, tok::l_brace,
358                                  tok::l_paren, tok::ellipsis) ||
359                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
360           }
361           if (ProbablyBracedList) {
362             Tok->BlockKind = BK_BracedInit;
363             LBraceStack.back()->BlockKind = BK_BracedInit;
364           } else {
365             Tok->BlockKind = BK_Block;
366             LBraceStack.back()->BlockKind = BK_Block;
367           }
368         }
369         LBraceStack.pop_back();
370       }
371       break;
372     case tok::at:
373     case tok::semi:
374     case tok::kw_if:
375     case tok::kw_while:
376     case tok::kw_for:
377     case tok::kw_switch:
378     case tok::kw_try:
379     case tok::kw___try:
380       if (!LBraceStack.empty())
381         LBraceStack.back()->BlockKind = BK_Block;
382       break;
383     default:
384       break;
385     }
386     Tok = NextTok;
387   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
388   // Assume other blocks for all unclosed opening braces.
389   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
390     if (LBraceStack[i]->BlockKind == BK_Unknown)
391       LBraceStack[i]->BlockKind = BK_Block;
392   }
393 
394   FormatTok = Tokens->setPosition(StoredPosition);
395 }
396 
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)397 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
398                                      bool MunchSemi) {
399   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
400   unsigned InitialLevel = Line->Level;
401   nextToken();
402 
403   addUnwrappedLine();
404 
405   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
406                                           MustBeDeclaration);
407   if (AddLevel)
408     ++Line->Level;
409   parseLevel(/*HasOpeningBrace=*/true);
410 
411   if (!FormatTok->Tok.is(tok::r_brace)) {
412     Line->Level = InitialLevel;
413     StructuralError = true;
414     return;
415   }
416 
417   nextToken(); // Munch the closing brace.
418   if (MunchSemi && FormatTok->Tok.is(tok::semi))
419     nextToken();
420   Line->Level = InitialLevel;
421 }
422 
isGoogScope(const UnwrappedLine & Line)423 static bool isGoogScope(const UnwrappedLine &Line) {
424   // FIXME: Closure-library specific stuff should not be hard-coded but be
425   // configurable.
426   if (Line.Tokens.size() < 4)
427     return false;
428   auto I = Line.Tokens.begin();
429   if (I->Tok->TokenText != "goog")
430     return false;
431   ++I;
432   if (I->Tok->isNot(tok::period))
433     return false;
434   ++I;
435   if (I->Tok->TokenText != "scope")
436     return false;
437   ++I;
438   return I->Tok->is(tok::l_paren);
439 }
440 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)441 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
442                                    const FormatToken &InitialToken) {
443   switch (Style.BreakBeforeBraces) {
444   case FormatStyle::BS_Linux:
445     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
446   case FormatStyle::BS_Allman:
447   case FormatStyle::BS_GNU:
448     return true;
449   default:
450     return false;
451   }
452 }
453 
parseChildBlock()454 void UnwrappedLineParser::parseChildBlock() {
455   FormatTok->BlockKind = BK_Block;
456   nextToken();
457   {
458     bool GoogScope =
459         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
460     ScopedLineState LineState(*this);
461     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
462                                             /*MustBeDeclaration=*/false);
463     Line->Level += GoogScope ? 0 : 1;
464     parseLevel(/*HasOpeningBrace=*/true);
465     flushComments(isOnNewLine(*FormatTok));
466     Line->Level -= GoogScope ? 0 : 1;
467   }
468   nextToken();
469 }
470 
parsePPDirective()471 void UnwrappedLineParser::parsePPDirective() {
472   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
473   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
474   nextToken();
475 
476   if (!FormatTok->Tok.getIdentifierInfo()) {
477     parsePPUnknown();
478     return;
479   }
480 
481   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
482   case tok::pp_define:
483     parsePPDefine();
484     return;
485   case tok::pp_if:
486     parsePPIf(/*IfDef=*/false);
487     break;
488   case tok::pp_ifdef:
489   case tok::pp_ifndef:
490     parsePPIf(/*IfDef=*/true);
491     break;
492   case tok::pp_else:
493     parsePPElse();
494     break;
495   case tok::pp_elif:
496     parsePPElIf();
497     break;
498   case tok::pp_endif:
499     parsePPEndIf();
500     break;
501   default:
502     parsePPUnknown();
503     break;
504   }
505 }
506 
conditionalCompilationCondition(bool Unreachable)507 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
508   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
509     PPStack.push_back(PP_Unreachable);
510   else
511     PPStack.push_back(PP_Conditional);
512 }
513 
conditionalCompilationStart(bool Unreachable)514 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
515   ++PPBranchLevel;
516   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
517   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
518     PPLevelBranchIndex.push_back(0);
519     PPLevelBranchCount.push_back(0);
520   }
521   PPChainBranchIndex.push(0);
522   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
523   conditionalCompilationCondition(Unreachable || Skip);
524 }
525 
conditionalCompilationAlternative()526 void UnwrappedLineParser::conditionalCompilationAlternative() {
527   if (!PPStack.empty())
528     PPStack.pop_back();
529   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
530   if (!PPChainBranchIndex.empty())
531     ++PPChainBranchIndex.top();
532   conditionalCompilationCondition(
533       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
534       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
535 }
536 
conditionalCompilationEnd()537 void UnwrappedLineParser::conditionalCompilationEnd() {
538   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
539   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
540     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
541       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
542     }
543   }
544   // Guard against #endif's without #if.
545   if (PPBranchLevel > 0)
546     --PPBranchLevel;
547   if (!PPChainBranchIndex.empty())
548     PPChainBranchIndex.pop();
549   if (!PPStack.empty())
550     PPStack.pop_back();
551 }
552 
parsePPIf(bool IfDef)553 void UnwrappedLineParser::parsePPIf(bool IfDef) {
554   nextToken();
555   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
556                          FormatTok->Tok.getLiteralData() != nullptr &&
557                          StringRef(FormatTok->Tok.getLiteralData(),
558                                    FormatTok->Tok.getLength()) == "0") ||
559                         FormatTok->Tok.is(tok::kw_false);
560   conditionalCompilationStart(!IfDef && IsLiteralFalse);
561   parsePPUnknown();
562 }
563 
parsePPElse()564 void UnwrappedLineParser::parsePPElse() {
565   conditionalCompilationAlternative();
566   parsePPUnknown();
567 }
568 
parsePPElIf()569 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
570 
parsePPEndIf()571 void UnwrappedLineParser::parsePPEndIf() {
572   conditionalCompilationEnd();
573   parsePPUnknown();
574 }
575 
parsePPDefine()576 void UnwrappedLineParser::parsePPDefine() {
577   nextToken();
578 
579   if (FormatTok->Tok.getKind() != tok::identifier) {
580     parsePPUnknown();
581     return;
582   }
583   nextToken();
584   if (FormatTok->Tok.getKind() == tok::l_paren &&
585       FormatTok->WhitespaceRange.getBegin() ==
586           FormatTok->WhitespaceRange.getEnd()) {
587     parseParens();
588   }
589   addUnwrappedLine();
590   Line->Level = 1;
591 
592   // Errors during a preprocessor directive can only affect the layout of the
593   // preprocessor directive, and thus we ignore them. An alternative approach
594   // would be to use the same approach we use on the file level (no
595   // re-indentation if there was a structural error) within the macro
596   // definition.
597   parseFile();
598 }
599 
parsePPUnknown()600 void UnwrappedLineParser::parsePPUnknown() {
601   do {
602     nextToken();
603   } while (!eof());
604   addUnwrappedLine();
605 }
606 
607 // Here we blacklist certain tokens that are not usually the first token in an
608 // unwrapped line. This is used in attempt to distinguish macro calls without
609 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const clang::Token & Tok)610 static bool tokenCanStartNewLine(const clang::Token &Tok) {
611   // Semicolon can be a null-statement, l_square can be a start of a macro or
612   // a C++11 attribute, but this doesn't seem to be common.
613   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
614          Tok.isNot(tok::l_square) &&
615          // Tokens that can only be used as binary operators and a part of
616          // overloaded operator names.
617          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
618          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
619          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
620          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
621          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
622          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
623          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
624          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
625          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
626          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
627          Tok.isNot(tok::lesslessequal) &&
628          // Colon is used in labels, base class lists, initializer lists,
629          // range-based for loops, ternary operator, but should never be the
630          // first token in an unwrapped line.
631          Tok.isNot(tok::colon) &&
632          // 'noexcept' is a trailing annotation.
633          Tok.isNot(tok::kw_noexcept);
634 }
635 
parseStructuralElement()636 void UnwrappedLineParser::parseStructuralElement() {
637   assert(!FormatTok->Tok.is(tok::l_brace));
638   switch (FormatTok->Tok.getKind()) {
639   case tok::at:
640     nextToken();
641     if (FormatTok->Tok.is(tok::l_brace)) {
642       parseBracedList();
643       break;
644     }
645     switch (FormatTok->Tok.getObjCKeywordID()) {
646     case tok::objc_public:
647     case tok::objc_protected:
648     case tok::objc_package:
649     case tok::objc_private:
650       return parseAccessSpecifier();
651     case tok::objc_interface:
652     case tok::objc_implementation:
653       return parseObjCInterfaceOrImplementation();
654     case tok::objc_protocol:
655       return parseObjCProtocol();
656     case tok::objc_end:
657       return; // Handled by the caller.
658     case tok::objc_optional:
659     case tok::objc_required:
660       nextToken();
661       addUnwrappedLine();
662       return;
663     case tok::objc_try:
664       // This branch isn't strictly necessary (the kw_try case below would
665       // do this too after the tok::at is parsed above).  But be explicit.
666       parseTryCatch();
667       return;
668     default:
669       break;
670     }
671     break;
672   case tok::kw_asm:
673     nextToken();
674     if (FormatTok->is(tok::l_brace)) {
675       nextToken();
676       while (FormatTok && FormatTok->isNot(tok::eof)) {
677         if (FormatTok->is(tok::r_brace)) {
678           nextToken();
679           break;
680         }
681         FormatTok->Finalized = true;
682         nextToken();
683       }
684     }
685     break;
686   case tok::kw_namespace:
687     parseNamespace();
688     return;
689   case tok::kw_inline:
690     nextToken();
691     if (FormatTok->Tok.is(tok::kw_namespace)) {
692       parseNamespace();
693       return;
694     }
695     break;
696   case tok::kw_public:
697   case tok::kw_protected:
698   case tok::kw_private:
699     if (Style.Language == FormatStyle::LK_Java ||
700         Style.Language == FormatStyle::LK_JavaScript)
701       nextToken();
702     else
703       parseAccessSpecifier();
704     return;
705   case tok::kw_if:
706     parseIfThenElse();
707     return;
708   case tok::kw_for:
709   case tok::kw_while:
710     parseForOrWhileLoop();
711     return;
712   case tok::kw_do:
713     parseDoWhile();
714     return;
715   case tok::kw_switch:
716     parseSwitch();
717     return;
718   case tok::kw_default:
719     nextToken();
720     parseLabel();
721     return;
722   case tok::kw_case:
723     parseCaseLabel();
724     return;
725   case tok::kw_try:
726   case tok::kw___try:
727     parseTryCatch();
728     return;
729   case tok::kw_extern:
730     nextToken();
731     if (FormatTok->Tok.is(tok::string_literal)) {
732       nextToken();
733       if (FormatTok->Tok.is(tok::l_brace)) {
734         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
735         addUnwrappedLine();
736         return;
737       }
738     }
739     break;
740   case tok::kw_export:
741     if (Style.Language == FormatStyle::LK_JavaScript) {
742       parseJavaScriptEs6ImportExport();
743       return;
744     }
745     break;
746   case tok::identifier:
747     if (FormatTok->IsForEachMacro) {
748       parseForOrWhileLoop();
749       return;
750     }
751     if (Style.Language == FormatStyle::LK_JavaScript &&
752         FormatTok->is(Keywords.kw_import)) {
753       parseJavaScriptEs6ImportExport();
754       return;
755     }
756     if (FormatTok->is(Keywords.kw_signals)) {
757       parseAccessSpecifier();
758       return;
759     }
760     // In all other cases, parse the declaration.
761     break;
762   default:
763     break;
764   }
765   do {
766     switch (FormatTok->Tok.getKind()) {
767     case tok::at:
768       nextToken();
769       if (FormatTok->Tok.is(tok::l_brace))
770         parseBracedList();
771       break;
772     case tok::kw_enum:
773       parseEnum();
774       break;
775     case tok::kw_typedef:
776       nextToken();
777       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
778                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
779         parseEnum();
780       break;
781     case tok::kw_struct:
782     case tok::kw_union:
783     case tok::kw_class:
784       parseRecord();
785       // A record declaration or definition is always the start of a structural
786       // element.
787       break;
788     case tok::period:
789       nextToken();
790       // In Java, classes have an implicit static member "class".
791       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
792           FormatTok->is(tok::kw_class))
793         nextToken();
794       break;
795     case tok::semi:
796       nextToken();
797       addUnwrappedLine();
798       return;
799     case tok::r_brace:
800       addUnwrappedLine();
801       return;
802     case tok::l_paren:
803       parseParens();
804       break;
805     case tok::caret:
806       nextToken();
807       if (FormatTok->Tok.isAnyIdentifier() ||
808           FormatTok->isSimpleTypeSpecifier())
809         nextToken();
810       if (FormatTok->is(tok::l_paren))
811         parseParens();
812       if (FormatTok->is(tok::l_brace))
813         parseChildBlock();
814       break;
815     case tok::l_brace:
816       if (!tryToParseBracedList()) {
817         // A block outside of parentheses must be the last part of a
818         // structural element.
819         // FIXME: Figure out cases where this is not true, and add projections
820         // for them (the one we know is missing are lambdas).
821         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
822           addUnwrappedLine();
823         FormatTok->Type = TT_FunctionLBrace;
824         parseBlock(/*MustBeDeclaration=*/false);
825         addUnwrappedLine();
826         return;
827       }
828       // Otherwise this was a braced init list, and the structural
829       // element continues.
830       break;
831     case tok::kw_try:
832       // We arrive here when parsing function-try blocks.
833       parseTryCatch();
834       return;
835     case tok::identifier: {
836       StringRef Text = FormatTok->TokenText;
837       // Parse function literal unless 'function' is the first token in a line
838       // in which case this should be treated as a free-standing function.
839       if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" &&
840           Line->Tokens.size() > 0) {
841         tryToParseJSFunction();
842         break;
843       }
844       nextToken();
845       if (Line->Tokens.size() == 1 &&
846           // JS doesn't have macros, and within classes colons indicate fields,
847           // not labels.
848           Style.Language != FormatStyle::LK_JavaScript) {
849         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
850           parseLabel();
851           return;
852         }
853         // Recognize function-like macro usages without trailing semicolon as
854         // well as free-standing macros like Q_OBJECT.
855         bool FunctionLike = FormatTok->is(tok::l_paren);
856         if (FunctionLike)
857           parseParens();
858         if (FormatTok->NewlinesBefore > 0 &&
859             (Text.size() >= 5 || FunctionLike) &&
860             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
861           addUnwrappedLine();
862           return;
863         }
864       }
865       break;
866     }
867     case tok::equal:
868       nextToken();
869       if (FormatTok->Tok.is(tok::l_brace)) {
870         parseBracedList();
871       }
872       break;
873     case tok::l_square:
874       parseSquare();
875       break;
876     case tok::kw_new:
877       parseNew();
878       break;
879     default:
880       nextToken();
881       break;
882     }
883   } while (!eof());
884 }
885 
tryToParseLambda()886 bool UnwrappedLineParser::tryToParseLambda() {
887   // FIXME: This is a dirty way to access the previous token. Find a better
888   // solution.
889   if (!Line->Tokens.empty() &&
890       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
891                                         tok::kw_new, tok::kw_delete) ||
892        Line->Tokens.back().Tok->closesScope() ||
893        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
894     nextToken();
895     return false;
896   }
897   assert(FormatTok->is(tok::l_square));
898   FormatToken &LSquare = *FormatTok;
899   if (!tryToParseLambdaIntroducer())
900     return false;
901 
902   while (FormatTok->isNot(tok::l_brace)) {
903     if (FormatTok->isSimpleTypeSpecifier()) {
904       nextToken();
905       continue;
906     }
907     switch (FormatTok->Tok.getKind()) {
908     case tok::l_brace:
909       break;
910     case tok::l_paren:
911       parseParens();
912       break;
913     case tok::amp:
914     case tok::star:
915     case tok::kw_const:
916     case tok::comma:
917     case tok::less:
918     case tok::greater:
919     case tok::identifier:
920     case tok::coloncolon:
921     case tok::kw_mutable:
922       nextToken();
923       break;
924     case tok::arrow:
925       FormatTok->Type = TT_TrailingReturnArrow;
926       nextToken();
927       break;
928     default:
929       return true;
930     }
931   }
932   LSquare.Type = TT_LambdaLSquare;
933   parseChildBlock();
934   return true;
935 }
936 
tryToParseLambdaIntroducer()937 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
938   nextToken();
939   if (FormatTok->is(tok::equal)) {
940     nextToken();
941     if (FormatTok->is(tok::r_square)) {
942       nextToken();
943       return true;
944     }
945     if (FormatTok->isNot(tok::comma))
946       return false;
947     nextToken();
948   } else if (FormatTok->is(tok::amp)) {
949     nextToken();
950     if (FormatTok->is(tok::r_square)) {
951       nextToken();
952       return true;
953     }
954     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
955       return false;
956     }
957     if (FormatTok->is(tok::comma))
958       nextToken();
959   } else if (FormatTok->is(tok::r_square)) {
960     nextToken();
961     return true;
962   }
963   do {
964     if (FormatTok->is(tok::amp))
965       nextToken();
966     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
967       return false;
968     nextToken();
969     if (FormatTok->is(tok::ellipsis))
970       nextToken();
971     if (FormatTok->is(tok::comma)) {
972       nextToken();
973     } else if (FormatTok->is(tok::r_square)) {
974       nextToken();
975       return true;
976     } else {
977       return false;
978     }
979   } while (!eof());
980   return false;
981 }
982 
tryToParseJSFunction()983 void UnwrappedLineParser::tryToParseJSFunction() {
984   nextToken();
985 
986   // Consume function name.
987   if (FormatTok->is(tok::identifier))
988     nextToken();
989 
990   if (FormatTok->isNot(tok::l_paren))
991     return;
992   nextToken();
993   while (FormatTok->isNot(tok::l_brace)) {
994     // Err on the side of caution in order to avoid consuming the full file in
995     // case of incomplete code.
996     if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
997                             tok::comment))
998       return;
999     nextToken();
1000   }
1001   parseChildBlock();
1002 }
1003 
tryToParseBracedList()1004 bool UnwrappedLineParser::tryToParseBracedList() {
1005   if (FormatTok->BlockKind == BK_Unknown)
1006     calculateBraceTypes();
1007   assert(FormatTok->BlockKind != BK_Unknown);
1008   if (FormatTok->BlockKind == BK_Block)
1009     return false;
1010   parseBracedList();
1011   return true;
1012 }
1013 
parseBracedList(bool ContinueOnSemicolons)1014 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1015   bool HasError = false;
1016   nextToken();
1017 
1018   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1019   // replace this by using parseAssigmentExpression() inside.
1020   do {
1021     if (Style.Language == FormatStyle::LK_JavaScript &&
1022         FormatTok->is(Keywords.kw_function)) {
1023       tryToParseJSFunction();
1024       continue;
1025     }
1026     switch (FormatTok->Tok.getKind()) {
1027     case tok::caret:
1028       nextToken();
1029       if (FormatTok->is(tok::l_brace)) {
1030         parseChildBlock();
1031       }
1032       break;
1033     case tok::l_square:
1034       tryToParseLambda();
1035       break;
1036     case tok::l_brace:
1037       // Assume there are no blocks inside a braced init list apart
1038       // from the ones we explicitly parse out (like lambdas).
1039       FormatTok->BlockKind = BK_BracedInit;
1040       parseBracedList();
1041       break;
1042     case tok::r_paren:
1043       // JavaScript can just have free standing methods and getters/setters in
1044       // object literals. Detect them by a "{" following ")".
1045       if (Style.Language == FormatStyle::LK_JavaScript) {
1046         nextToken();
1047         if (FormatTok->is(tok::l_brace))
1048           parseChildBlock();
1049         break;
1050       }
1051       nextToken();
1052       break;
1053     case tok::r_brace:
1054       nextToken();
1055       return !HasError;
1056     case tok::semi:
1057       HasError = true;
1058       if (!ContinueOnSemicolons)
1059         return !HasError;
1060       nextToken();
1061       break;
1062     case tok::comma:
1063       nextToken();
1064       break;
1065     default:
1066       nextToken();
1067       break;
1068     }
1069   } while (!eof());
1070   return false;
1071 }
1072 
parseParens()1073 void UnwrappedLineParser::parseParens() {
1074   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1075   nextToken();
1076   do {
1077     switch (FormatTok->Tok.getKind()) {
1078     case tok::l_paren:
1079       parseParens();
1080       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1081         parseChildBlock();
1082       break;
1083     case tok::r_paren:
1084       nextToken();
1085       return;
1086     case tok::r_brace:
1087       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1088       return;
1089     case tok::l_square:
1090       tryToParseLambda();
1091       break;
1092     case tok::l_brace:
1093       if (!tryToParseBracedList()) {
1094         parseChildBlock();
1095       }
1096       break;
1097     case tok::at:
1098       nextToken();
1099       if (FormatTok->Tok.is(tok::l_brace))
1100         parseBracedList();
1101       break;
1102     case tok::identifier:
1103       if (Style.Language == FormatStyle::LK_JavaScript &&
1104           FormatTok->is(Keywords.kw_function))
1105         tryToParseJSFunction();
1106       else
1107         nextToken();
1108       break;
1109     default:
1110       nextToken();
1111       break;
1112     }
1113   } while (!eof());
1114 }
1115 
parseSquare()1116 void UnwrappedLineParser::parseSquare() {
1117   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1118   if (tryToParseLambda())
1119     return;
1120   do {
1121     switch (FormatTok->Tok.getKind()) {
1122     case tok::l_paren:
1123       parseParens();
1124       break;
1125     case tok::r_square:
1126       nextToken();
1127       return;
1128     case tok::r_brace:
1129       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1130       return;
1131     case tok::l_square:
1132       parseSquare();
1133       break;
1134     case tok::l_brace: {
1135       if (!tryToParseBracedList()) {
1136         parseChildBlock();
1137       }
1138       break;
1139     }
1140     case tok::at:
1141       nextToken();
1142       if (FormatTok->Tok.is(tok::l_brace))
1143         parseBracedList();
1144       break;
1145     default:
1146       nextToken();
1147       break;
1148     }
1149   } while (!eof());
1150 }
1151 
parseIfThenElse()1152 void UnwrappedLineParser::parseIfThenElse() {
1153   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1154   nextToken();
1155   if (FormatTok->Tok.is(tok::l_paren))
1156     parseParens();
1157   bool NeedsUnwrappedLine = false;
1158   if (FormatTok->Tok.is(tok::l_brace)) {
1159     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1160     parseBlock(/*MustBeDeclaration=*/false);
1161     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1162         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1163       addUnwrappedLine();
1164     } else {
1165       NeedsUnwrappedLine = true;
1166     }
1167   } else {
1168     addUnwrappedLine();
1169     ++Line->Level;
1170     parseStructuralElement();
1171     --Line->Level;
1172   }
1173   if (FormatTok->Tok.is(tok::kw_else)) {
1174     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1175       addUnwrappedLine();
1176     nextToken();
1177     if (FormatTok->Tok.is(tok::l_brace)) {
1178       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1179       parseBlock(/*MustBeDeclaration=*/false);
1180       addUnwrappedLine();
1181     } else if (FormatTok->Tok.is(tok::kw_if)) {
1182       parseIfThenElse();
1183     } else {
1184       addUnwrappedLine();
1185       ++Line->Level;
1186       parseStructuralElement();
1187       --Line->Level;
1188     }
1189   } else if (NeedsUnwrappedLine) {
1190     addUnwrappedLine();
1191   }
1192 }
1193 
parseTryCatch()1194 void UnwrappedLineParser::parseTryCatch() {
1195   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1196   nextToken();
1197   bool NeedsUnwrappedLine = false;
1198   if (FormatTok->is(tok::colon)) {
1199     // We are in a function try block, what comes is an initializer list.
1200     nextToken();
1201     while (FormatTok->is(tok::identifier)) {
1202       nextToken();
1203       if (FormatTok->is(tok::l_paren))
1204         parseParens();
1205       else
1206         StructuralError = true;
1207       if (FormatTok->is(tok::comma))
1208         nextToken();
1209     }
1210   }
1211   // Parse try with resource.
1212   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1213     parseParens();
1214   }
1215   if (FormatTok->is(tok::l_brace)) {
1216     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1217     parseBlock(/*MustBeDeclaration=*/false);
1218     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1219         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1220         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1221       addUnwrappedLine();
1222     } else {
1223       NeedsUnwrappedLine = true;
1224     }
1225   } else if (!FormatTok->is(tok::kw_catch)) {
1226     // The C++ standard requires a compound-statement after a try.
1227     // If there's none, we try to assume there's a structuralElement
1228     // and try to continue.
1229     StructuralError = true;
1230     addUnwrappedLine();
1231     ++Line->Level;
1232     parseStructuralElement();
1233     --Line->Level;
1234   }
1235   while (1) {
1236     if (FormatTok->is(tok::at))
1237       nextToken();
1238     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1239                              tok::kw___finally) ||
1240           ((Style.Language == FormatStyle::LK_Java ||
1241             Style.Language == FormatStyle::LK_JavaScript) &&
1242            FormatTok->is(Keywords.kw_finally)) ||
1243           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1244            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1245       break;
1246     nextToken();
1247     while (FormatTok->isNot(tok::l_brace)) {
1248       if (FormatTok->is(tok::l_paren)) {
1249         parseParens();
1250         continue;
1251       }
1252       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1253         return;
1254       nextToken();
1255     }
1256     NeedsUnwrappedLine = false;
1257     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1258     parseBlock(/*MustBeDeclaration=*/false);
1259     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1260         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1261         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1262       addUnwrappedLine();
1263     } else {
1264       NeedsUnwrappedLine = true;
1265     }
1266   }
1267   if (NeedsUnwrappedLine) {
1268     addUnwrappedLine();
1269   }
1270 }
1271 
parseNamespace()1272 void UnwrappedLineParser::parseNamespace() {
1273   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1274 
1275   const FormatToken &InitialToken = *FormatTok;
1276   nextToken();
1277   if (FormatTok->Tok.is(tok::identifier))
1278     nextToken();
1279   if (FormatTok->Tok.is(tok::l_brace)) {
1280     if (ShouldBreakBeforeBrace(Style, InitialToken))
1281       addUnwrappedLine();
1282 
1283     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1284                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1285                      DeclarationScopeStack.size() > 1);
1286     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1287     // Munch the semicolon after a namespace. This is more common than one would
1288     // think. Puttin the semicolon into its own line is very ugly.
1289     if (FormatTok->Tok.is(tok::semi))
1290       nextToken();
1291     addUnwrappedLine();
1292   }
1293   // FIXME: Add error handling.
1294 }
1295 
parseNew()1296 void UnwrappedLineParser::parseNew() {
1297   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1298   nextToken();
1299   if (Style.Language != FormatStyle::LK_Java)
1300     return;
1301 
1302   // In Java, we can parse everything up to the parens, which aren't optional.
1303   do {
1304     // There should not be a ;, { or } before the new's open paren.
1305     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1306       return;
1307 
1308     // Consume the parens.
1309     if (FormatTok->is(tok::l_paren)) {
1310       parseParens();
1311 
1312       // If there is a class body of an anonymous class, consume that as child.
1313       if (FormatTok->is(tok::l_brace))
1314         parseChildBlock();
1315       return;
1316     }
1317     nextToken();
1318   } while (!eof());
1319 }
1320 
parseForOrWhileLoop()1321 void UnwrappedLineParser::parseForOrWhileLoop() {
1322   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) ||
1323           FormatTok->IsForEachMacro) &&
1324          "'for', 'while' or foreach macro expected");
1325   nextToken();
1326   if (FormatTok->Tok.is(tok::l_paren))
1327     parseParens();
1328   if (FormatTok->Tok.is(tok::l_brace)) {
1329     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1330     parseBlock(/*MustBeDeclaration=*/false);
1331     addUnwrappedLine();
1332   } else {
1333     addUnwrappedLine();
1334     ++Line->Level;
1335     parseStructuralElement();
1336     --Line->Level;
1337   }
1338 }
1339 
parseDoWhile()1340 void UnwrappedLineParser::parseDoWhile() {
1341   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1342   nextToken();
1343   if (FormatTok->Tok.is(tok::l_brace)) {
1344     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1345     parseBlock(/*MustBeDeclaration=*/false);
1346     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1347       addUnwrappedLine();
1348   } else {
1349     addUnwrappedLine();
1350     ++Line->Level;
1351     parseStructuralElement();
1352     --Line->Level;
1353   }
1354 
1355   // FIXME: Add error handling.
1356   if (!FormatTok->Tok.is(tok::kw_while)) {
1357     addUnwrappedLine();
1358     return;
1359   }
1360 
1361   nextToken();
1362   parseStructuralElement();
1363 }
1364 
parseLabel()1365 void UnwrappedLineParser::parseLabel() {
1366   nextToken();
1367   unsigned OldLineLevel = Line->Level;
1368   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1369     --Line->Level;
1370   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1371     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1372     parseBlock(/*MustBeDeclaration=*/false);
1373     if (FormatTok->Tok.is(tok::kw_break)) {
1374       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1375       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1376           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1377         addUnwrappedLine();
1378       }
1379       parseStructuralElement();
1380     }
1381     addUnwrappedLine();
1382   } else {
1383     addUnwrappedLine();
1384   }
1385   Line->Level = OldLineLevel;
1386 }
1387 
parseCaseLabel()1388 void UnwrappedLineParser::parseCaseLabel() {
1389   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1390   // FIXME: fix handling of complex expressions here.
1391   do {
1392     nextToken();
1393   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1394   parseLabel();
1395 }
1396 
parseSwitch()1397 void UnwrappedLineParser::parseSwitch() {
1398   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1399   nextToken();
1400   if (FormatTok->Tok.is(tok::l_paren))
1401     parseParens();
1402   if (FormatTok->Tok.is(tok::l_brace)) {
1403     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1404     parseBlock(/*MustBeDeclaration=*/false);
1405     addUnwrappedLine();
1406   } else {
1407     addUnwrappedLine();
1408     ++Line->Level;
1409     parseStructuralElement();
1410     --Line->Level;
1411   }
1412 }
1413 
parseAccessSpecifier()1414 void UnwrappedLineParser::parseAccessSpecifier() {
1415   nextToken();
1416   // Understand Qt's slots.
1417   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1418     nextToken();
1419   // Otherwise, we don't know what it is, and we'd better keep the next token.
1420   if (FormatTok->Tok.is(tok::colon))
1421     nextToken();
1422   addUnwrappedLine();
1423 }
1424 
parseEnum()1425 void UnwrappedLineParser::parseEnum() {
1426   // Won't be 'enum' for NS_ENUMs.
1427   if (FormatTok->Tok.is(tok::kw_enum))
1428     nextToken();
1429 
1430   // Eat up enum class ...
1431   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1432     nextToken();
1433   while (FormatTok->Tok.getIdentifierInfo() ||
1434          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1435                             tok::greater, tok::comma, tok::question)) {
1436     nextToken();
1437     // We can have macros or attributes in between 'enum' and the enum name.
1438     if (FormatTok->is(tok::l_paren))
1439       parseParens();
1440     if (FormatTok->is(tok::identifier))
1441       nextToken();
1442   }
1443 
1444   // Just a declaration or something is wrong.
1445   if (FormatTok->isNot(tok::l_brace))
1446     return;
1447   FormatTok->BlockKind = BK_Block;
1448 
1449   if (Style.Language == FormatStyle::LK_Java) {
1450     // Java enums are different.
1451     parseJavaEnumBody();
1452     return;
1453   }
1454 
1455   // Parse enum body.
1456   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1457   if (HasError) {
1458     if (FormatTok->is(tok::semi))
1459       nextToken();
1460     addUnwrappedLine();
1461   }
1462 
1463   // We fall through to parsing a structural element afterwards, so that in
1464   // enum A {} n, m;
1465   // "} n, m;" will end up in one unwrapped line.
1466 }
1467 
parseJavaEnumBody()1468 void UnwrappedLineParser::parseJavaEnumBody() {
1469   // Determine whether the enum is simple, i.e. does not have a semicolon or
1470   // constants with class bodies. Simple enums can be formatted like braced
1471   // lists, contracted to a single line, etc.
1472   unsigned StoredPosition = Tokens->getPosition();
1473   bool IsSimple = true;
1474   FormatToken *Tok = Tokens->getNextToken();
1475   while (Tok) {
1476     if (Tok->is(tok::r_brace))
1477       break;
1478     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1479       IsSimple = false;
1480       break;
1481     }
1482     // FIXME: This will also mark enums with braces in the arguments to enum
1483     // constants as "not simple". This is probably fine in practice, though.
1484     Tok = Tokens->getNextToken();
1485   }
1486   FormatTok = Tokens->setPosition(StoredPosition);
1487 
1488   if (IsSimple) {
1489     parseBracedList();
1490     addUnwrappedLine();
1491     return;
1492   }
1493 
1494   // Parse the body of a more complex enum.
1495   // First add a line for everything up to the "{".
1496   nextToken();
1497   addUnwrappedLine();
1498   ++Line->Level;
1499 
1500   // Parse the enum constants.
1501   while (FormatTok) {
1502     if (FormatTok->is(tok::l_brace)) {
1503       // Parse the constant's class body.
1504       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1505                  /*MunchSemi=*/false);
1506     } else if (FormatTok->is(tok::l_paren)) {
1507       parseParens();
1508     } else if (FormatTok->is(tok::comma)) {
1509       nextToken();
1510       addUnwrappedLine();
1511     } else if (FormatTok->is(tok::semi)) {
1512       nextToken();
1513       addUnwrappedLine();
1514       break;
1515     } else if (FormatTok->is(tok::r_brace)) {
1516       addUnwrappedLine();
1517       break;
1518     } else {
1519       nextToken();
1520     }
1521   }
1522 
1523   // Parse the class body after the enum's ";" if any.
1524   parseLevel(/*HasOpeningBrace=*/true);
1525   nextToken();
1526   --Line->Level;
1527   addUnwrappedLine();
1528 }
1529 
parseRecord()1530 void UnwrappedLineParser::parseRecord() {
1531   const FormatToken &InitialToken = *FormatTok;
1532   nextToken();
1533   if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute,
1534                          tok::kw___declspec, tok::kw_alignas)) {
1535     nextToken();
1536     // We can have macros or attributes in between 'class' and the class name.
1537     if (FormatTok->Tok.is(tok::l_paren)) {
1538       parseParens();
1539     }
1540     // The actual identifier can be a nested name specifier, and in macros
1541     // it is often token-pasted.
1542     while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) ||
1543            FormatTok->is(tok::hashhash) ||
1544            ((Style.Language == FormatStyle::LK_Java ||
1545              Style.Language == FormatStyle::LK_JavaScript) &&
1546             FormatTok->isOneOf(tok::period, tok::comma)))
1547       nextToken();
1548 
1549     // Note that parsing away template declarations here leads to incorrectly
1550     // accepting function declarations as record declarations.
1551     // In general, we cannot solve this problem. Consider:
1552     // class A<int> B() {}
1553     // which can be a function definition or a class definition when B() is a
1554     // macro. If we find enough real-world cases where this is a problem, we
1555     // can parse for the 'template' keyword in the beginning of the statement,
1556     // and thus rule out the record production in case there is no template
1557     // (this would still leave us with an ambiguity between template function
1558     // and class declarations).
1559     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1560       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1561         if (FormatTok->Tok.is(tok::semi))
1562           return;
1563         nextToken();
1564       }
1565     }
1566   }
1567   if (FormatTok->Tok.is(tok::l_brace)) {
1568     if (ShouldBreakBeforeBrace(Style, InitialToken))
1569       addUnwrappedLine();
1570 
1571     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1572                /*MunchSemi=*/false);
1573   }
1574   // We fall through to parsing a structural element afterwards, so
1575   // class A {} n, m;
1576   // will end up in one unwrapped line.
1577   // This does not apply for Java.
1578   if (Style.Language == FormatStyle::LK_Java ||
1579       Style.Language == FormatStyle::LK_JavaScript)
1580     addUnwrappedLine();
1581 }
1582 
parseObjCProtocolList()1583 void UnwrappedLineParser::parseObjCProtocolList() {
1584   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1585   do
1586     nextToken();
1587   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1588   nextToken(); // Skip '>'.
1589 }
1590 
parseObjCUntilAtEnd()1591 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1592   do {
1593     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1594       nextToken();
1595       addUnwrappedLine();
1596       break;
1597     }
1598     if (FormatTok->is(tok::l_brace)) {
1599       parseBlock(/*MustBeDeclaration=*/false);
1600       // In ObjC interfaces, nothing should be following the "}".
1601       addUnwrappedLine();
1602     } else if (FormatTok->is(tok::r_brace)) {
1603       // Ignore stray "}". parseStructuralElement doesn't consume them.
1604       nextToken();
1605       addUnwrappedLine();
1606     } else {
1607       parseStructuralElement();
1608     }
1609   } while (!eof());
1610 }
1611 
parseObjCInterfaceOrImplementation()1612 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1613   nextToken();
1614   nextToken(); // interface name
1615 
1616   // @interface can be followed by either a base class, or a category.
1617   if (FormatTok->Tok.is(tok::colon)) {
1618     nextToken();
1619     nextToken(); // base class name
1620   } else if (FormatTok->Tok.is(tok::l_paren))
1621     // Skip category, if present.
1622     parseParens();
1623 
1624   if (FormatTok->Tok.is(tok::less))
1625     parseObjCProtocolList();
1626 
1627   if (FormatTok->Tok.is(tok::l_brace)) {
1628     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1629         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1630       addUnwrappedLine();
1631     parseBlock(/*MustBeDeclaration=*/true);
1632   }
1633 
1634   // With instance variables, this puts '}' on its own line.  Without instance
1635   // variables, this ends the @interface line.
1636   addUnwrappedLine();
1637 
1638   parseObjCUntilAtEnd();
1639 }
1640 
parseObjCProtocol()1641 void UnwrappedLineParser::parseObjCProtocol() {
1642   nextToken();
1643   nextToken(); // protocol name
1644 
1645   if (FormatTok->Tok.is(tok::less))
1646     parseObjCProtocolList();
1647 
1648   // Check for protocol declaration.
1649   if (FormatTok->Tok.is(tok::semi)) {
1650     nextToken();
1651     return addUnwrappedLine();
1652   }
1653 
1654   addUnwrappedLine();
1655   parseObjCUntilAtEnd();
1656 }
1657 
parseJavaScriptEs6ImportExport()1658 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1659   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1660   nextToken();
1661 
1662   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_function,
1663                          Keywords.kw_var))
1664     return; // Fall through to parsing the corresponding structure.
1665 
1666   if (FormatTok->is(tok::kw_default)) {
1667     nextToken(); // export default ..., fall through after eating 'default'.
1668     return;
1669   }
1670 
1671   if (FormatTok->is(tok::l_brace)) {
1672     FormatTok->BlockKind = BK_Block;
1673     parseBracedList();
1674   }
1675 
1676   while (!eof() && FormatTok->isNot(tok::semi) &&
1677          FormatTok->isNot(tok::l_brace)) {
1678     nextToken();
1679   }
1680 }
1681 
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")1682 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1683                                                  StringRef Prefix = "") {
1684   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1685                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1686   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1687                                                     E = Line.Tokens.end();
1688        I != E; ++I) {
1689     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1690   }
1691   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1692                                                     E = Line.Tokens.end();
1693        I != E; ++I) {
1694     const UnwrappedLineNode &Node = *I;
1695     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1696              I = Node.Children.begin(),
1697              E = Node.Children.end();
1698          I != E; ++I) {
1699       printDebugInfo(*I, "\nChild: ");
1700     }
1701   }
1702   llvm::dbgs() << "\n";
1703 }
1704 
addUnwrappedLine()1705 void UnwrappedLineParser::addUnwrappedLine() {
1706   if (Line->Tokens.empty())
1707     return;
1708   DEBUG({
1709     if (CurrentLines == &Lines)
1710       printDebugInfo(*Line);
1711   });
1712   CurrentLines->push_back(*Line);
1713   Line->Tokens.clear();
1714   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1715     for (SmallVectorImpl<UnwrappedLine>::iterator
1716              I = PreprocessorDirectives.begin(),
1717              E = PreprocessorDirectives.end();
1718          I != E; ++I) {
1719       CurrentLines->push_back(*I);
1720     }
1721     PreprocessorDirectives.clear();
1722   }
1723 }
1724 
eof() const1725 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1726 
isOnNewLine(const FormatToken & FormatTok)1727 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1728   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1729          FormatTok.NewlinesBefore > 0;
1730 }
1731 
flushComments(bool NewlineBeforeNext)1732 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1733   bool JustComments = Line->Tokens.empty();
1734   for (SmallVectorImpl<FormatToken *>::const_iterator
1735            I = CommentsBeforeNextToken.begin(),
1736            E = CommentsBeforeNextToken.end();
1737        I != E; ++I) {
1738     if (isOnNewLine(**I) && JustComments) {
1739       addUnwrappedLine();
1740     }
1741     pushToken(*I);
1742   }
1743   if (NewlineBeforeNext && JustComments) {
1744     addUnwrappedLine();
1745   }
1746   CommentsBeforeNextToken.clear();
1747 }
1748 
nextToken()1749 void UnwrappedLineParser::nextToken() {
1750   if (eof())
1751     return;
1752   flushComments(isOnNewLine(*FormatTok));
1753   pushToken(FormatTok);
1754   readToken();
1755 }
1756 
readToken()1757 void UnwrappedLineParser::readToken() {
1758   bool CommentsInCurrentLine = true;
1759   do {
1760     FormatTok = Tokens->getNextToken();
1761     assert(FormatTok);
1762     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1763            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1764       // If there is an unfinished unwrapped line, we flush the preprocessor
1765       // directives only after that unwrapped line was finished later.
1766       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1767       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1768       // Comments stored before the preprocessor directive need to be output
1769       // before the preprocessor directive, at the same level as the
1770       // preprocessor directive, as we consider them to apply to the directive.
1771       flushComments(isOnNewLine(*FormatTok));
1772       parsePPDirective();
1773     }
1774     while (FormatTok->Type == TT_ConflictStart ||
1775            FormatTok->Type == TT_ConflictEnd ||
1776            FormatTok->Type == TT_ConflictAlternative) {
1777       if (FormatTok->Type == TT_ConflictStart) {
1778         conditionalCompilationStart(/*Unreachable=*/false);
1779       } else if (FormatTok->Type == TT_ConflictAlternative) {
1780         conditionalCompilationAlternative();
1781       } else if (FormatTok->Type == TT_ConflictEnd) {
1782         conditionalCompilationEnd();
1783       }
1784       FormatTok = Tokens->getNextToken();
1785       FormatTok->MustBreakBefore = true;
1786     }
1787 
1788     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1789         !Line->InPPDirective) {
1790       continue;
1791     }
1792 
1793     if (!FormatTok->Tok.is(tok::comment))
1794       return;
1795     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1796       CommentsInCurrentLine = false;
1797     }
1798     if (CommentsInCurrentLine) {
1799       pushToken(FormatTok);
1800     } else {
1801       CommentsBeforeNextToken.push_back(FormatTok);
1802     }
1803   } while (!eof());
1804 }
1805 
pushToken(FormatToken * Tok)1806 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1807   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1808   if (MustBreakBeforeNextToken) {
1809     Line->Tokens.back().Tok->MustBreakBefore = true;
1810     MustBreakBeforeNextToken = false;
1811   }
1812 }
1813 
1814 } // end namespace format
1815 } // end namespace clang
1816