1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
~FormatTokenSource()28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
~ScopedDeclarationState()45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
~ScopedMacroState()70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
getNextToken()77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
getPosition()87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
setPosition(unsigned Position)89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
eof()95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
getFakeEOF()97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
~ScopedLineState()134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
~CompoundStatementIndenter()162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
getNextToken()176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
getPosition()181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
setPosition(unsigned P)186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
reset()191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
206       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
207 
reset()208 void UnwrappedLineParser::reset() {
209   PPBranchLevel = -1;
210   Line.reset(new UnwrappedLine);
211   CommentsBeforeNextToken.clear();
212   FormatTok = nullptr;
213   MustBreakBeforeNextToken = false;
214   PreprocessorDirectives.clear();
215   CurrentLines = &Lines;
216   DeclarationScopeStack.clear();
217   PPStack.clear();
218 }
219 
parse()220 void UnwrappedLineParser::parse() {
221   IndexedTokenSource TokenSource(AllTokens);
222   do {
223     DEBUG(llvm::dbgs() << "----\n");
224     reset();
225     Tokens = &TokenSource;
226     TokenSource.reset();
227 
228     readToken();
229     parseFile();
230     // Create line with eof token.
231     pushToken(FormatTok);
232     addUnwrappedLine();
233 
234     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
235                                                   E = Lines.end();
236          I != E; ++I) {
237       Callback.consumeUnwrappedLine(*I);
238     }
239     Callback.finishRun();
240     Lines.clear();
241     while (!PPLevelBranchIndex.empty() &&
242            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
243       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
244       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
245     }
246     if (!PPLevelBranchIndex.empty()) {
247       ++PPLevelBranchIndex.back();
248       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
249       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
250     }
251   } while (!PPLevelBranchIndex.empty());
252 }
253 
parseFile()254 void UnwrappedLineParser::parseFile() {
255   // The top-level context in a file always has declarations, except for pre-
256   // processor directives and JavaScript files.
257   bool MustBeDeclaration =
258       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
259   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
260                                           MustBeDeclaration);
261   parseLevel(/*HasOpeningBrace=*/false);
262   // Make sure to format the remaining tokens.
263   flushComments(true);
264   addUnwrappedLine();
265 }
266 
parseLevel(bool HasOpeningBrace)267 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
268   bool SwitchLabelEncountered = false;
269   do {
270     tok::TokenKind kind = FormatTok->Tok.getKind();
271     if (FormatTok->Type == TT_MacroBlockBegin) {
272       kind = tok::l_brace;
273     } else if (FormatTok->Type == TT_MacroBlockEnd) {
274       kind = tok::r_brace;
275     }
276 
277     switch (kind) {
278     case tok::comment:
279       nextToken();
280       addUnwrappedLine();
281       break;
282     case tok::l_brace:
283       // FIXME: Add parameter whether this can happen - if this happens, we must
284       // be in a non-declaration context.
285       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
286         continue;
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
calculateBraceTypes(bool ExpectClassBody)311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   // Keep a stack of positions of lbrace tokens. We will
319   // update information about whether an lbrace starts a
320   // braced init list or a different block during the loop.
321   SmallVector<FormatToken *, 8> LBraceStack;
322   assert(Tok->Tok.is(tok::l_brace));
323   do {
324     // Get next non-comment token.
325     FormatToken *NextTok;
326     unsigned ReadTokens = 0;
327     do {
328       NextTok = Tokens->getNextToken();
329       ++ReadTokens;
330     } while (NextTok->is(tok::comment));
331 
332     switch (Tok->Tok.getKind()) {
333     case tok::l_brace:
334       Tok->BlockKind = BK_Unknown;
335       LBraceStack.push_back(Tok);
336       break;
337     case tok::r_brace:
338       if (!LBraceStack.empty()) {
339         if (LBraceStack.back()->BlockKind == BK_Unknown) {
340           bool ProbablyBracedList = false;
341           if (Style.Language == FormatStyle::LK_Proto) {
342             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
343           } else {
344             // Using OriginalColumn to distinguish between ObjC methods and
345             // binary operators is a bit hacky.
346             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
347                                     NextTok->OriginalColumn == 0;
348 
349             // If there is a comma, semicolon or right paren after the closing
350             // brace, we assume this is a braced initializer list.  Note that
351             // regardless how we mark inner braces here, we will overwrite the
352             // BlockKind later if we parse a braced list (where all blocks
353             // inside are by default braced lists), or when we explicitly detect
354             // blocks (for example while parsing lambdas).
355             //
356             // We exclude + and - as they can be ObjC visibility modifiers.
357             ProbablyBracedList =
358                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
359                                  tok::r_paren, tok::r_square, tok::l_brace,
360                                  tok::l_paren, tok::ellipsis) ||
361                 (NextTok->is(tok::semi) &&
362                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
363                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
364           }
365           if (ProbablyBracedList) {
366             Tok->BlockKind = BK_BracedInit;
367             LBraceStack.back()->BlockKind = BK_BracedInit;
368           } else {
369             Tok->BlockKind = BK_Block;
370             LBraceStack.back()->BlockKind = BK_Block;
371           }
372         }
373         LBraceStack.pop_back();
374       }
375       break;
376     case tok::at:
377     case tok::semi:
378     case tok::kw_if:
379     case tok::kw_while:
380     case tok::kw_for:
381     case tok::kw_switch:
382     case tok::kw_try:
383     case tok::kw___try:
384       if (!LBraceStack.empty())
385         LBraceStack.back()->BlockKind = BK_Block;
386       break;
387     default:
388       break;
389     }
390     Tok = NextTok;
391   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
392   // Assume other blocks for all unclosed opening braces.
393   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
394     if (LBraceStack[i]->BlockKind == BK_Unknown)
395       LBraceStack[i]->BlockKind = BK_Block;
396   }
397 
398   FormatTok = Tokens->setPosition(StoredPosition);
399 }
400 
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
402                                      bool MunchSemi) {
403   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
404          "'{' or macro block token expected");
405   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
406   FormatTok->BlockKind = BK_Block;
407 
408   unsigned InitialLevel = Line->Level;
409   nextToken();
410 
411   if (MacroBlock && FormatTok->is(tok::l_paren))
412     parseParens();
413 
414   addUnwrappedLine();
415 
416   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
417                                           MustBeDeclaration);
418   if (AddLevel)
419     ++Line->Level;
420   parseLevel(/*HasOpeningBrace=*/true);
421 
422   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
423                  : !FormatTok->is(tok::r_brace)) {
424     Line->Level = InitialLevel;
425     FormatTok->BlockKind = BK_Block;
426     return;
427   }
428 
429   nextToken(); // Munch the closing brace.
430 
431   if (MacroBlock && FormatTok->is(tok::l_paren))
432     parseParens();
433 
434   if (MunchSemi && FormatTok->Tok.is(tok::semi))
435     nextToken();
436   Line->Level = InitialLevel;
437 }
438 
isGoogScope(const UnwrappedLine & Line)439 static bool isGoogScope(const UnwrappedLine &Line) {
440   // FIXME: Closure-library specific stuff should not be hard-coded but be
441   // configurable.
442   if (Line.Tokens.size() < 4)
443     return false;
444   auto I = Line.Tokens.begin();
445   if (I->Tok->TokenText != "goog")
446     return false;
447   ++I;
448   if (I->Tok->isNot(tok::period))
449     return false;
450   ++I;
451   if (I->Tok->TokenText != "scope")
452     return false;
453   ++I;
454   return I->Tok->is(tok::l_paren);
455 }
456 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)457 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
458                                    const FormatToken &InitialToken) {
459   if (InitialToken.is(tok::kw_namespace))
460     return Style.BraceWrapping.AfterNamespace;
461   if (InitialToken.is(tok::kw_class))
462     return Style.BraceWrapping.AfterClass;
463   if (InitialToken.is(tok::kw_union))
464     return Style.BraceWrapping.AfterUnion;
465   if (InitialToken.is(tok::kw_struct))
466     return Style.BraceWrapping.AfterStruct;
467   return false;
468 }
469 
parseChildBlock()470 void UnwrappedLineParser::parseChildBlock() {
471   FormatTok->BlockKind = BK_Block;
472   nextToken();
473   {
474     bool GoogScope =
475         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
476     ScopedLineState LineState(*this);
477     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
478                                             /*MustBeDeclaration=*/false);
479     Line->Level += GoogScope ? 0 : 1;
480     parseLevel(/*HasOpeningBrace=*/true);
481     flushComments(isOnNewLine(*FormatTok));
482     Line->Level -= GoogScope ? 0 : 1;
483   }
484   nextToken();
485 }
486 
parsePPDirective()487 void UnwrappedLineParser::parsePPDirective() {
488   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
489   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
490   nextToken();
491 
492   if (!FormatTok->Tok.getIdentifierInfo()) {
493     parsePPUnknown();
494     return;
495   }
496 
497   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
498   case tok::pp_define:
499     parsePPDefine();
500     return;
501   case tok::pp_if:
502     parsePPIf(/*IfDef=*/false);
503     break;
504   case tok::pp_ifdef:
505   case tok::pp_ifndef:
506     parsePPIf(/*IfDef=*/true);
507     break;
508   case tok::pp_else:
509     parsePPElse();
510     break;
511   case tok::pp_elif:
512     parsePPElIf();
513     break;
514   case tok::pp_endif:
515     parsePPEndIf();
516     break;
517   default:
518     parsePPUnknown();
519     break;
520   }
521 }
522 
conditionalCompilationCondition(bool Unreachable)523 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
524   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
525     PPStack.push_back(PP_Unreachable);
526   else
527     PPStack.push_back(PP_Conditional);
528 }
529 
conditionalCompilationStart(bool Unreachable)530 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
531   ++PPBranchLevel;
532   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
533   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
534     PPLevelBranchIndex.push_back(0);
535     PPLevelBranchCount.push_back(0);
536   }
537   PPChainBranchIndex.push(0);
538   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
539   conditionalCompilationCondition(Unreachable || Skip);
540 }
541 
conditionalCompilationAlternative()542 void UnwrappedLineParser::conditionalCompilationAlternative() {
543   if (!PPStack.empty())
544     PPStack.pop_back();
545   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
546   if (!PPChainBranchIndex.empty())
547     ++PPChainBranchIndex.top();
548   conditionalCompilationCondition(
549       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
550       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
551 }
552 
conditionalCompilationEnd()553 void UnwrappedLineParser::conditionalCompilationEnd() {
554   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
555   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
556     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
557       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
558     }
559   }
560   // Guard against #endif's without #if.
561   if (PPBranchLevel > 0)
562     --PPBranchLevel;
563   if (!PPChainBranchIndex.empty())
564     PPChainBranchIndex.pop();
565   if (!PPStack.empty())
566     PPStack.pop_back();
567 }
568 
parsePPIf(bool IfDef)569 void UnwrappedLineParser::parsePPIf(bool IfDef) {
570   nextToken();
571   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
572                          FormatTok->Tok.getLiteralData() != nullptr &&
573                          StringRef(FormatTok->Tok.getLiteralData(),
574                                    FormatTok->Tok.getLength()) == "0") ||
575                         FormatTok->Tok.is(tok::kw_false);
576   conditionalCompilationStart(!IfDef && IsLiteralFalse);
577   parsePPUnknown();
578 }
579 
parsePPElse()580 void UnwrappedLineParser::parsePPElse() {
581   conditionalCompilationAlternative();
582   parsePPUnknown();
583 }
584 
parsePPElIf()585 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
586 
parsePPEndIf()587 void UnwrappedLineParser::parsePPEndIf() {
588   conditionalCompilationEnd();
589   parsePPUnknown();
590 }
591 
parsePPDefine()592 void UnwrappedLineParser::parsePPDefine() {
593   nextToken();
594 
595   if (FormatTok->Tok.getKind() != tok::identifier) {
596     parsePPUnknown();
597     return;
598   }
599   nextToken();
600   if (FormatTok->Tok.getKind() == tok::l_paren &&
601       FormatTok->WhitespaceRange.getBegin() ==
602           FormatTok->WhitespaceRange.getEnd()) {
603     parseParens();
604   }
605   addUnwrappedLine();
606   Line->Level = 1;
607 
608   // Errors during a preprocessor directive can only affect the layout of the
609   // preprocessor directive, and thus we ignore them. An alternative approach
610   // would be to use the same approach we use on the file level (no
611   // re-indentation if there was a structural error) within the macro
612   // definition.
613   parseFile();
614 }
615 
parsePPUnknown()616 void UnwrappedLineParser::parsePPUnknown() {
617   do {
618     nextToken();
619   } while (!eof());
620   addUnwrappedLine();
621 }
622 
623 // Here we blacklist certain tokens that are not usually the first token in an
624 // unwrapped line. This is used in attempt to distinguish macro calls without
625 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const clang::Token & Tok)626 static bool tokenCanStartNewLine(const clang::Token &Tok) {
627   // Semicolon can be a null-statement, l_square can be a start of a macro or
628   // a C++11 attribute, but this doesn't seem to be common.
629   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
630          Tok.isNot(tok::l_square) &&
631          // Tokens that can only be used as binary operators and a part of
632          // overloaded operator names.
633          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
634          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
635          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
636          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
637          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
638          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
639          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
640          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
641          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
642          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
643          Tok.isNot(tok::lesslessequal) &&
644          // Colon is used in labels, base class lists, initializer lists,
645          // range-based for loops, ternary operator, but should never be the
646          // first token in an unwrapped line.
647          Tok.isNot(tok::colon) &&
648          // 'noexcept' is a trailing annotation.
649          Tok.isNot(tok::kw_noexcept);
650 }
651 
parseStructuralElement()652 void UnwrappedLineParser::parseStructuralElement() {
653   assert(!FormatTok->Tok.is(tok::l_brace));
654   switch (FormatTok->Tok.getKind()) {
655   case tok::at:
656     nextToken();
657     if (FormatTok->Tok.is(tok::l_brace)) {
658       parseBracedList();
659       break;
660     }
661     switch (FormatTok->Tok.getObjCKeywordID()) {
662     case tok::objc_public:
663     case tok::objc_protected:
664     case tok::objc_package:
665     case tok::objc_private:
666       return parseAccessSpecifier();
667     case tok::objc_interface:
668     case tok::objc_implementation:
669       return parseObjCInterfaceOrImplementation();
670     case tok::objc_protocol:
671       return parseObjCProtocol();
672     case tok::objc_end:
673       return; // Handled by the caller.
674     case tok::objc_optional:
675     case tok::objc_required:
676       nextToken();
677       addUnwrappedLine();
678       return;
679     case tok::objc_autoreleasepool:
680       nextToken();
681       if (FormatTok->Tok.is(tok::l_brace)) {
682         if (Style.BraceWrapping.AfterObjCDeclaration)
683           addUnwrappedLine();
684         parseBlock(/*MustBeDeclaration=*/false);
685       }
686       addUnwrappedLine();
687       return;
688     case tok::objc_try:
689       // This branch isn't strictly necessary (the kw_try case below would
690       // do this too after the tok::at is parsed above).  But be explicit.
691       parseTryCatch();
692       return;
693     default:
694       break;
695     }
696     break;
697   case tok::kw_asm:
698     nextToken();
699     if (FormatTok->is(tok::l_brace)) {
700       FormatTok->Type = TT_InlineASMBrace;
701       nextToken();
702       while (FormatTok && FormatTok->isNot(tok::eof)) {
703         if (FormatTok->is(tok::r_brace)) {
704           FormatTok->Type = TT_InlineASMBrace;
705           nextToken();
706           addUnwrappedLine();
707           break;
708         }
709         FormatTok->Finalized = true;
710         nextToken();
711       }
712     }
713     break;
714   case tok::kw_namespace:
715     parseNamespace();
716     return;
717   case tok::kw_inline:
718     nextToken();
719     if (FormatTok->Tok.is(tok::kw_namespace)) {
720       parseNamespace();
721       return;
722     }
723     break;
724   case tok::kw_public:
725   case tok::kw_protected:
726   case tok::kw_private:
727     if (Style.Language == FormatStyle::LK_Java ||
728         Style.Language == FormatStyle::LK_JavaScript)
729       nextToken();
730     else
731       parseAccessSpecifier();
732     return;
733   case tok::kw_if:
734     parseIfThenElse();
735     return;
736   case tok::kw_for:
737   case tok::kw_while:
738     parseForOrWhileLoop();
739     return;
740   case tok::kw_do:
741     parseDoWhile();
742     return;
743   case tok::kw_switch:
744     parseSwitch();
745     return;
746   case tok::kw_default:
747     nextToken();
748     parseLabel();
749     return;
750   case tok::kw_case:
751     parseCaseLabel();
752     return;
753   case tok::kw_try:
754   case tok::kw___try:
755     parseTryCatch();
756     return;
757   case tok::kw_extern:
758     nextToken();
759     if (FormatTok->Tok.is(tok::string_literal)) {
760       nextToken();
761       if (FormatTok->Tok.is(tok::l_brace)) {
762         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
763         addUnwrappedLine();
764         return;
765       }
766     }
767     break;
768   case tok::kw_export:
769     if (Style.Language == FormatStyle::LK_JavaScript) {
770       parseJavaScriptEs6ImportExport();
771       return;
772     }
773     break;
774   case tok::identifier:
775     if (FormatTok->is(TT_ForEachMacro)) {
776       parseForOrWhileLoop();
777       return;
778     }
779     if (FormatTok->is(TT_MacroBlockBegin)) {
780       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
781                  /*MunchSemi=*/false);
782       return;
783     }
784     if (Style.Language == FormatStyle::LK_JavaScript &&
785         FormatTok->is(Keywords.kw_import)) {
786       parseJavaScriptEs6ImportExport();
787       return;
788     }
789     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
790                            Keywords.kw_slots, Keywords.kw_qslots)) {
791       nextToken();
792       if (FormatTok->is(tok::colon)) {
793         nextToken();
794         addUnwrappedLine();
795       }
796       return;
797     }
798     // In all other cases, parse the declaration.
799     break;
800   default:
801     break;
802   }
803   do {
804     switch (FormatTok->Tok.getKind()) {
805     case tok::at:
806       nextToken();
807       if (FormatTok->Tok.is(tok::l_brace))
808         parseBracedList();
809       break;
810     case tok::kw_enum:
811       // parseEnum falls through and does not yet add an unwrapped line as an
812       // enum definition can start a structural element.
813       parseEnum();
814       // This only applies for C++.
815       if (Style.Language != FormatStyle::LK_Cpp) {
816         addUnwrappedLine();
817         return;
818       }
819       break;
820     case tok::kw_typedef:
821       nextToken();
822       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
823                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
824         parseEnum();
825       break;
826     case tok::kw_struct:
827     case tok::kw_union:
828     case tok::kw_class:
829       // parseRecord falls through and does not yet add an unwrapped line as a
830       // record declaration or definition can start a structural element.
831       parseRecord();
832       // This does not apply for Java and JavaScript.
833       if (Style.Language == FormatStyle::LK_Java ||
834           Style.Language == FormatStyle::LK_JavaScript) {
835         addUnwrappedLine();
836         return;
837       }
838       break;
839     case tok::period:
840       nextToken();
841       // In Java, classes have an implicit static member "class".
842       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
843           FormatTok->is(tok::kw_class))
844         nextToken();
845       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
846           FormatTok->Tok.getIdentifierInfo())
847         // JavaScript only has pseudo keywords, all keywords are allowed to
848         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
849         nextToken();
850       break;
851     case tok::semi:
852       nextToken();
853       addUnwrappedLine();
854       return;
855     case tok::r_brace:
856       addUnwrappedLine();
857       return;
858     case tok::l_paren:
859       parseParens();
860       break;
861     case tok::kw_operator:
862       nextToken();
863       if (FormatTok->isBinaryOperator())
864         nextToken();
865       break;
866     case tok::caret:
867       nextToken();
868       if (FormatTok->Tok.isAnyIdentifier() ||
869           FormatTok->isSimpleTypeSpecifier())
870         nextToken();
871       if (FormatTok->is(tok::l_paren))
872         parseParens();
873       if (FormatTok->is(tok::l_brace))
874         parseChildBlock();
875       break;
876     case tok::l_brace:
877       if (!tryToParseBracedList()) {
878         // A block outside of parentheses must be the last part of a
879         // structural element.
880         // FIXME: Figure out cases where this is not true, and add projections
881         // for them (the one we know is missing are lambdas).
882         if (Style.BraceWrapping.AfterFunction)
883           addUnwrappedLine();
884         FormatTok->Type = TT_FunctionLBrace;
885         parseBlock(/*MustBeDeclaration=*/false);
886         addUnwrappedLine();
887         return;
888       }
889       // Otherwise this was a braced init list, and the structural
890       // element continues.
891       break;
892     case tok::kw_try:
893       // We arrive here when parsing function-try blocks.
894       parseTryCatch();
895       return;
896     case tok::identifier: {
897       if (FormatTok->is(TT_MacroBlockEnd)) {
898         addUnwrappedLine();
899         return;
900       }
901 
902       // Parse function literal unless 'function' is the first token in a line
903       // in which case this should be treated as a free-standing function.
904       if (Style.Language == FormatStyle::LK_JavaScript &&
905           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
906         tryToParseJSFunction();
907         break;
908       }
909       if ((Style.Language == FormatStyle::LK_JavaScript ||
910            Style.Language == FormatStyle::LK_Java) &&
911           FormatTok->is(Keywords.kw_interface)) {
912         parseRecord();
913         addUnwrappedLine();
914         return;
915       }
916 
917       StringRef Text = FormatTok->TokenText;
918       nextToken();
919       if (Line->Tokens.size() == 1 &&
920           // JS doesn't have macros, and within classes colons indicate fields,
921           // not labels.
922           Style.Language != FormatStyle::LK_JavaScript) {
923         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
924           parseLabel();
925           return;
926         }
927         // Recognize function-like macro usages without trailing semicolon as
928         // well as free-standing macros like Q_OBJECT.
929         bool FunctionLike = FormatTok->is(tok::l_paren);
930         if (FunctionLike)
931           parseParens();
932 
933         bool FollowedByNewline =
934             CommentsBeforeNextToken.empty()
935                 ? FormatTok->NewlinesBefore > 0
936                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
937 
938         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
939             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
940           addUnwrappedLine();
941           return;
942         }
943       }
944       break;
945     }
946     case tok::equal:
947       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
948       // TT_JsFatArrow. The always start an expression or a child block if
949       // followed by a curly.
950       if (FormatTok->is(TT_JsFatArrow)) {
951         nextToken();
952         if (FormatTok->is(tok::l_brace))
953           parseChildBlock();
954         break;
955       }
956 
957       nextToken();
958       if (FormatTok->Tok.is(tok::l_brace)) {
959         parseBracedList();
960       }
961       break;
962     case tok::l_square:
963       parseSquare();
964       break;
965     case tok::kw_new:
966       parseNew();
967       break;
968     default:
969       nextToken();
970       break;
971     }
972   } while (!eof());
973 }
974 
tryToParseLambda()975 bool UnwrappedLineParser::tryToParseLambda() {
976   if (Style.Language != FormatStyle::LK_Cpp) {
977     nextToken();
978     return false;
979   }
980   // FIXME: This is a dirty way to access the previous token. Find a better
981   // solution.
982   if (!Line->Tokens.empty() &&
983       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
984                                         tok::kw_new, tok::kw_delete) ||
985        Line->Tokens.back().Tok->closesScope() ||
986        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
987     nextToken();
988     return false;
989   }
990   assert(FormatTok->is(tok::l_square));
991   FormatToken &LSquare = *FormatTok;
992   if (!tryToParseLambdaIntroducer())
993     return false;
994 
995   while (FormatTok->isNot(tok::l_brace)) {
996     if (FormatTok->isSimpleTypeSpecifier()) {
997       nextToken();
998       continue;
999     }
1000     switch (FormatTok->Tok.getKind()) {
1001     case tok::l_brace:
1002       break;
1003     case tok::l_paren:
1004       parseParens();
1005       break;
1006     case tok::amp:
1007     case tok::star:
1008     case tok::kw_const:
1009     case tok::comma:
1010     case tok::less:
1011     case tok::greater:
1012     case tok::identifier:
1013     case tok::numeric_constant:
1014     case tok::coloncolon:
1015     case tok::kw_mutable:
1016       nextToken();
1017       break;
1018     case tok::arrow:
1019       FormatTok->Type = TT_LambdaArrow;
1020       nextToken();
1021       break;
1022     default:
1023       return true;
1024     }
1025   }
1026   LSquare.Type = TT_LambdaLSquare;
1027   parseChildBlock();
1028   return true;
1029 }
1030 
tryToParseLambdaIntroducer()1031 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1032   nextToken();
1033   if (FormatTok->is(tok::equal)) {
1034     nextToken();
1035     if (FormatTok->is(tok::r_square)) {
1036       nextToken();
1037       return true;
1038     }
1039     if (FormatTok->isNot(tok::comma))
1040       return false;
1041     nextToken();
1042   } else if (FormatTok->is(tok::amp)) {
1043     nextToken();
1044     if (FormatTok->is(tok::r_square)) {
1045       nextToken();
1046       return true;
1047     }
1048     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1049       return false;
1050     }
1051     if (FormatTok->is(tok::comma))
1052       nextToken();
1053   } else if (FormatTok->is(tok::r_square)) {
1054     nextToken();
1055     return true;
1056   }
1057   do {
1058     if (FormatTok->is(tok::amp))
1059       nextToken();
1060     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1061       return false;
1062     nextToken();
1063     if (FormatTok->is(tok::ellipsis))
1064       nextToken();
1065     if (FormatTok->is(tok::comma)) {
1066       nextToken();
1067     } else if (FormatTok->is(tok::r_square)) {
1068       nextToken();
1069       return true;
1070     } else {
1071       return false;
1072     }
1073   } while (!eof());
1074   return false;
1075 }
1076 
tryToParseJSFunction()1077 void UnwrappedLineParser::tryToParseJSFunction() {
1078   nextToken();
1079 
1080   // Consume function name.
1081   if (FormatTok->is(tok::identifier))
1082     nextToken();
1083 
1084   if (FormatTok->isNot(tok::l_paren))
1085     return;
1086 
1087   // Parse formal parameter list.
1088   parseParens();
1089 
1090   if (FormatTok->is(tok::colon)) {
1091     // Parse a type definition.
1092     nextToken();
1093 
1094     // Eat the type declaration. For braced inline object types, balance braces,
1095     // otherwise just parse until finding an l_brace for the function body.
1096     if (FormatTok->is(tok::l_brace))
1097       tryToParseBracedList();
1098     else
1099       while (FormatTok->isNot(tok::l_brace) && !eof())
1100         nextToken();
1101   }
1102 
1103   parseChildBlock();
1104 }
1105 
tryToParseBracedList()1106 bool UnwrappedLineParser::tryToParseBracedList() {
1107   if (FormatTok->BlockKind == BK_Unknown)
1108     calculateBraceTypes();
1109   assert(FormatTok->BlockKind != BK_Unknown);
1110   if (FormatTok->BlockKind == BK_Block)
1111     return false;
1112   parseBracedList();
1113   return true;
1114 }
1115 
parseBracedList(bool ContinueOnSemicolons)1116 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1117   bool HasError = false;
1118   nextToken();
1119 
1120   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1121   // replace this by using parseAssigmentExpression() inside.
1122   do {
1123     if (Style.Language == FormatStyle::LK_JavaScript) {
1124       if (FormatTok->is(Keywords.kw_function)) {
1125         tryToParseJSFunction();
1126         continue;
1127       }
1128       if (FormatTok->is(TT_JsFatArrow)) {
1129         nextToken();
1130         // Fat arrows can be followed by simple expressions or by child blocks
1131         // in curly braces.
1132         if (FormatTok->is(tok::l_brace)) {
1133           parseChildBlock();
1134           continue;
1135         }
1136       }
1137     }
1138     switch (FormatTok->Tok.getKind()) {
1139     case tok::caret:
1140       nextToken();
1141       if (FormatTok->is(tok::l_brace)) {
1142         parseChildBlock();
1143       }
1144       break;
1145     case tok::l_square:
1146       tryToParseLambda();
1147       break;
1148     case tok::l_brace:
1149       // Assume there are no blocks inside a braced init list apart
1150       // from the ones we explicitly parse out (like lambdas).
1151       FormatTok->BlockKind = BK_BracedInit;
1152       parseBracedList();
1153       break;
1154     case tok::l_paren:
1155       parseParens();
1156       // JavaScript can just have free standing methods and getters/setters in
1157       // object literals. Detect them by a "{" following ")".
1158       if (Style.Language == FormatStyle::LK_JavaScript) {
1159         if (FormatTok->is(tok::l_brace))
1160           parseChildBlock();
1161         break;
1162       }
1163       break;
1164     case tok::r_brace:
1165       nextToken();
1166       return !HasError;
1167     case tok::semi:
1168       HasError = true;
1169       if (!ContinueOnSemicolons)
1170         return !HasError;
1171       nextToken();
1172       break;
1173     case tok::comma:
1174       nextToken();
1175       break;
1176     default:
1177       nextToken();
1178       break;
1179     }
1180   } while (!eof());
1181   return false;
1182 }
1183 
parseParens()1184 void UnwrappedLineParser::parseParens() {
1185   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1186   nextToken();
1187   do {
1188     switch (FormatTok->Tok.getKind()) {
1189     case tok::l_paren:
1190       parseParens();
1191       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1192         parseChildBlock();
1193       break;
1194     case tok::r_paren:
1195       nextToken();
1196       return;
1197     case tok::r_brace:
1198       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1199       return;
1200     case tok::l_square:
1201       tryToParseLambda();
1202       break;
1203     case tok::l_brace:
1204       if (!tryToParseBracedList())
1205         parseChildBlock();
1206       break;
1207     case tok::at:
1208       nextToken();
1209       if (FormatTok->Tok.is(tok::l_brace))
1210         parseBracedList();
1211       break;
1212     case tok::identifier:
1213       if (Style.Language == FormatStyle::LK_JavaScript &&
1214           FormatTok->is(Keywords.kw_function))
1215         tryToParseJSFunction();
1216       else
1217         nextToken();
1218       break;
1219     default:
1220       nextToken();
1221       break;
1222     }
1223   } while (!eof());
1224 }
1225 
parseSquare()1226 void UnwrappedLineParser::parseSquare() {
1227   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1228   if (tryToParseLambda())
1229     return;
1230   do {
1231     switch (FormatTok->Tok.getKind()) {
1232     case tok::l_paren:
1233       parseParens();
1234       break;
1235     case tok::r_square:
1236       nextToken();
1237       return;
1238     case tok::r_brace:
1239       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1240       return;
1241     case tok::l_square:
1242       parseSquare();
1243       break;
1244     case tok::l_brace: {
1245       if (!tryToParseBracedList())
1246         parseChildBlock();
1247       break;
1248     }
1249     case tok::at:
1250       nextToken();
1251       if (FormatTok->Tok.is(tok::l_brace))
1252         parseBracedList();
1253       break;
1254     default:
1255       nextToken();
1256       break;
1257     }
1258   } while (!eof());
1259 }
1260 
parseIfThenElse()1261 void UnwrappedLineParser::parseIfThenElse() {
1262   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1263   nextToken();
1264   if (FormatTok->Tok.is(tok::l_paren))
1265     parseParens();
1266   bool NeedsUnwrappedLine = false;
1267   if (FormatTok->Tok.is(tok::l_brace)) {
1268     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1269     parseBlock(/*MustBeDeclaration=*/false);
1270     if (Style.BraceWrapping.BeforeElse)
1271       addUnwrappedLine();
1272     else
1273       NeedsUnwrappedLine = true;
1274   } else {
1275     addUnwrappedLine();
1276     ++Line->Level;
1277     parseStructuralElement();
1278     --Line->Level;
1279   }
1280   if (FormatTok->Tok.is(tok::kw_else)) {
1281     nextToken();
1282     if (FormatTok->Tok.is(tok::l_brace)) {
1283       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1284       parseBlock(/*MustBeDeclaration=*/false);
1285       addUnwrappedLine();
1286     } else if (FormatTok->Tok.is(tok::kw_if)) {
1287       parseIfThenElse();
1288     } else {
1289       addUnwrappedLine();
1290       ++Line->Level;
1291       parseStructuralElement();
1292       --Line->Level;
1293     }
1294   } else if (NeedsUnwrappedLine) {
1295     addUnwrappedLine();
1296   }
1297 }
1298 
parseTryCatch()1299 void UnwrappedLineParser::parseTryCatch() {
1300   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1301   nextToken();
1302   bool NeedsUnwrappedLine = false;
1303   if (FormatTok->is(tok::colon)) {
1304     // We are in a function try block, what comes is an initializer list.
1305     nextToken();
1306     while (FormatTok->is(tok::identifier)) {
1307       nextToken();
1308       if (FormatTok->is(tok::l_paren))
1309         parseParens();
1310       if (FormatTok->is(tok::comma))
1311         nextToken();
1312     }
1313   }
1314   // Parse try with resource.
1315   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1316     parseParens();
1317   }
1318   if (FormatTok->is(tok::l_brace)) {
1319     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1320     parseBlock(/*MustBeDeclaration=*/false);
1321     if (Style.BraceWrapping.BeforeCatch) {
1322       addUnwrappedLine();
1323     } else {
1324       NeedsUnwrappedLine = true;
1325     }
1326   } else if (!FormatTok->is(tok::kw_catch)) {
1327     // The C++ standard requires a compound-statement after a try.
1328     // If there's none, we try to assume there's a structuralElement
1329     // and try to continue.
1330     addUnwrappedLine();
1331     ++Line->Level;
1332     parseStructuralElement();
1333     --Line->Level;
1334   }
1335   while (1) {
1336     if (FormatTok->is(tok::at))
1337       nextToken();
1338     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1339                              tok::kw___finally) ||
1340           ((Style.Language == FormatStyle::LK_Java ||
1341             Style.Language == FormatStyle::LK_JavaScript) &&
1342            FormatTok->is(Keywords.kw_finally)) ||
1343           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1344            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1345       break;
1346     nextToken();
1347     while (FormatTok->isNot(tok::l_brace)) {
1348       if (FormatTok->is(tok::l_paren)) {
1349         parseParens();
1350         continue;
1351       }
1352       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1353         return;
1354       nextToken();
1355     }
1356     NeedsUnwrappedLine = false;
1357     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1358     parseBlock(/*MustBeDeclaration=*/false);
1359     if (Style.BraceWrapping.BeforeCatch)
1360       addUnwrappedLine();
1361     else
1362       NeedsUnwrappedLine = true;
1363   }
1364   if (NeedsUnwrappedLine)
1365     addUnwrappedLine();
1366 }
1367 
parseNamespace()1368 void UnwrappedLineParser::parseNamespace() {
1369   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1370 
1371   const FormatToken &InitialToken = *FormatTok;
1372   nextToken();
1373   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1374     nextToken();
1375   if (FormatTok->Tok.is(tok::l_brace)) {
1376     if (ShouldBreakBeforeBrace(Style, InitialToken))
1377       addUnwrappedLine();
1378 
1379     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1380                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1381                      DeclarationScopeStack.size() > 1);
1382     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1383     // Munch the semicolon after a namespace. This is more common than one would
1384     // think. Puttin the semicolon into its own line is very ugly.
1385     if (FormatTok->Tok.is(tok::semi))
1386       nextToken();
1387     addUnwrappedLine();
1388   }
1389   // FIXME: Add error handling.
1390 }
1391 
parseNew()1392 void UnwrappedLineParser::parseNew() {
1393   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1394   nextToken();
1395   if (Style.Language != FormatStyle::LK_Java)
1396     return;
1397 
1398   // In Java, we can parse everything up to the parens, which aren't optional.
1399   do {
1400     // There should not be a ;, { or } before the new's open paren.
1401     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1402       return;
1403 
1404     // Consume the parens.
1405     if (FormatTok->is(tok::l_paren)) {
1406       parseParens();
1407 
1408       // If there is a class body of an anonymous class, consume that as child.
1409       if (FormatTok->is(tok::l_brace))
1410         parseChildBlock();
1411       return;
1412     }
1413     nextToken();
1414   } while (!eof());
1415 }
1416 
parseForOrWhileLoop()1417 void UnwrappedLineParser::parseForOrWhileLoop() {
1418   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1419          "'for', 'while' or foreach macro expected");
1420   nextToken();
1421   if (FormatTok->Tok.is(tok::l_paren))
1422     parseParens();
1423   if (FormatTok->Tok.is(tok::l_brace)) {
1424     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1425     parseBlock(/*MustBeDeclaration=*/false);
1426     addUnwrappedLine();
1427   } else {
1428     addUnwrappedLine();
1429     ++Line->Level;
1430     parseStructuralElement();
1431     --Line->Level;
1432   }
1433 }
1434 
parseDoWhile()1435 void UnwrappedLineParser::parseDoWhile() {
1436   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1437   nextToken();
1438   if (FormatTok->Tok.is(tok::l_brace)) {
1439     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1440     parseBlock(/*MustBeDeclaration=*/false);
1441     if (Style.BraceWrapping.IndentBraces)
1442       addUnwrappedLine();
1443   } else {
1444     addUnwrappedLine();
1445     ++Line->Level;
1446     parseStructuralElement();
1447     --Line->Level;
1448   }
1449 
1450   // FIXME: Add error handling.
1451   if (!FormatTok->Tok.is(tok::kw_while)) {
1452     addUnwrappedLine();
1453     return;
1454   }
1455 
1456   nextToken();
1457   parseStructuralElement();
1458 }
1459 
parseLabel()1460 void UnwrappedLineParser::parseLabel() {
1461   nextToken();
1462   unsigned OldLineLevel = Line->Level;
1463   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1464     --Line->Level;
1465   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1466     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1467     parseBlock(/*MustBeDeclaration=*/false);
1468     if (FormatTok->Tok.is(tok::kw_break)) {
1469       if (Style.BraceWrapping.AfterControlStatement)
1470         addUnwrappedLine();
1471       parseStructuralElement();
1472     }
1473     addUnwrappedLine();
1474   } else {
1475     if (FormatTok->is(tok::semi))
1476       nextToken();
1477     addUnwrappedLine();
1478   }
1479   Line->Level = OldLineLevel;
1480 }
1481 
parseCaseLabel()1482 void UnwrappedLineParser::parseCaseLabel() {
1483   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1484   // FIXME: fix handling of complex expressions here.
1485   do {
1486     nextToken();
1487   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1488   parseLabel();
1489 }
1490 
parseSwitch()1491 void UnwrappedLineParser::parseSwitch() {
1492   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1493   nextToken();
1494   if (FormatTok->Tok.is(tok::l_paren))
1495     parseParens();
1496   if (FormatTok->Tok.is(tok::l_brace)) {
1497     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1498     parseBlock(/*MustBeDeclaration=*/false);
1499     addUnwrappedLine();
1500   } else {
1501     addUnwrappedLine();
1502     ++Line->Level;
1503     parseStructuralElement();
1504     --Line->Level;
1505   }
1506 }
1507 
parseAccessSpecifier()1508 void UnwrappedLineParser::parseAccessSpecifier() {
1509   nextToken();
1510   // Understand Qt's slots.
1511   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1512     nextToken();
1513   // Otherwise, we don't know what it is, and we'd better keep the next token.
1514   if (FormatTok->Tok.is(tok::colon))
1515     nextToken();
1516   addUnwrappedLine();
1517 }
1518 
parseEnum()1519 void UnwrappedLineParser::parseEnum() {
1520   // Won't be 'enum' for NS_ENUMs.
1521   if (FormatTok->Tok.is(tok::kw_enum))
1522     nextToken();
1523 
1524   // Eat up enum class ...
1525   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1526     nextToken();
1527 
1528   while (FormatTok->Tok.getIdentifierInfo() ||
1529          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1530                             tok::greater, tok::comma, tok::question)) {
1531     nextToken();
1532     // We can have macros or attributes in between 'enum' and the enum name.
1533     if (FormatTok->is(tok::l_paren))
1534       parseParens();
1535     if (FormatTok->is(tok::identifier)) {
1536       nextToken();
1537       // If there are two identifiers in a row, this is likely an elaborate
1538       // return type. In Java, this can be "implements", etc.
1539       if (Style.Language == FormatStyle::LK_Cpp &&
1540           FormatTok->is(tok::identifier))
1541         return;
1542     }
1543   }
1544 
1545   // Just a declaration or something is wrong.
1546   if (FormatTok->isNot(tok::l_brace))
1547     return;
1548   FormatTok->BlockKind = BK_Block;
1549 
1550   if (Style.Language == FormatStyle::LK_Java) {
1551     // Java enums are different.
1552     parseJavaEnumBody();
1553     return;
1554   } else if (Style.Language == FormatStyle::LK_Proto) {
1555     parseBlock(/*MustBeDeclaration=*/true);
1556     return;
1557   }
1558 
1559   // Parse enum body.
1560   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1561   if (HasError) {
1562     if (FormatTok->is(tok::semi))
1563       nextToken();
1564     addUnwrappedLine();
1565   }
1566 
1567   // There is no addUnwrappedLine() here so that we fall through to parsing a
1568   // structural element afterwards. Thus, in "enum A {} n, m;",
1569   // "} n, m;" will end up in one unwrapped line.
1570 }
1571 
parseJavaEnumBody()1572 void UnwrappedLineParser::parseJavaEnumBody() {
1573   // Determine whether the enum is simple, i.e. does not have a semicolon or
1574   // constants with class bodies. Simple enums can be formatted like braced
1575   // lists, contracted to a single line, etc.
1576   unsigned StoredPosition = Tokens->getPosition();
1577   bool IsSimple = true;
1578   FormatToken *Tok = Tokens->getNextToken();
1579   while (Tok) {
1580     if (Tok->is(tok::r_brace))
1581       break;
1582     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1583       IsSimple = false;
1584       break;
1585     }
1586     // FIXME: This will also mark enums with braces in the arguments to enum
1587     // constants as "not simple". This is probably fine in practice, though.
1588     Tok = Tokens->getNextToken();
1589   }
1590   FormatTok = Tokens->setPosition(StoredPosition);
1591 
1592   if (IsSimple) {
1593     parseBracedList();
1594     addUnwrappedLine();
1595     return;
1596   }
1597 
1598   // Parse the body of a more complex enum.
1599   // First add a line for everything up to the "{".
1600   nextToken();
1601   addUnwrappedLine();
1602   ++Line->Level;
1603 
1604   // Parse the enum constants.
1605   while (FormatTok) {
1606     if (FormatTok->is(tok::l_brace)) {
1607       // Parse the constant's class body.
1608       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1609                  /*MunchSemi=*/false);
1610     } else if (FormatTok->is(tok::l_paren)) {
1611       parseParens();
1612     } else if (FormatTok->is(tok::comma)) {
1613       nextToken();
1614       addUnwrappedLine();
1615     } else if (FormatTok->is(tok::semi)) {
1616       nextToken();
1617       addUnwrappedLine();
1618       break;
1619     } else if (FormatTok->is(tok::r_brace)) {
1620       addUnwrappedLine();
1621       break;
1622     } else {
1623       nextToken();
1624     }
1625   }
1626 
1627   // Parse the class body after the enum's ";" if any.
1628   parseLevel(/*HasOpeningBrace=*/true);
1629   nextToken();
1630   --Line->Level;
1631   addUnwrappedLine();
1632 }
1633 
parseRecord()1634 void UnwrappedLineParser::parseRecord() {
1635   const FormatToken &InitialToken = *FormatTok;
1636   nextToken();
1637 
1638   // The actual identifier can be a nested name specifier, and in macros
1639   // it is often token-pasted.
1640   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1641                             tok::kw___attribute, tok::kw___declspec,
1642                             tok::kw_alignas) ||
1643          ((Style.Language == FormatStyle::LK_Java ||
1644            Style.Language == FormatStyle::LK_JavaScript) &&
1645           FormatTok->isOneOf(tok::period, tok::comma))) {
1646     bool IsNonMacroIdentifier =
1647         FormatTok->is(tok::identifier) &&
1648         FormatTok->TokenText != FormatTok->TokenText.upper();
1649     nextToken();
1650     // We can have macros or attributes in between 'class' and the class name.
1651     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1652       parseParens();
1653   }
1654 
1655   // Note that parsing away template declarations here leads to incorrectly
1656   // accepting function declarations as record declarations.
1657   // In general, we cannot solve this problem. Consider:
1658   // class A<int> B() {}
1659   // which can be a function definition or a class definition when B() is a
1660   // macro. If we find enough real-world cases where this is a problem, we
1661   // can parse for the 'template' keyword in the beginning of the statement,
1662   // and thus rule out the record production in case there is no template
1663   // (this would still leave us with an ambiguity between template function
1664   // and class declarations).
1665   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1666     while (!eof()) {
1667       if (FormatTok->is(tok::l_brace)) {
1668         calculateBraceTypes(/*ExpectClassBody=*/true);
1669         if (!tryToParseBracedList())
1670           break;
1671       }
1672       if (FormatTok->Tok.is(tok::semi))
1673         return;
1674       nextToken();
1675     }
1676   }
1677   if (FormatTok->Tok.is(tok::l_brace)) {
1678     if (ShouldBreakBeforeBrace(Style, InitialToken))
1679       addUnwrappedLine();
1680 
1681     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1682                /*MunchSemi=*/false);
1683   }
1684   // There is no addUnwrappedLine() here so that we fall through to parsing a
1685   // structural element afterwards. Thus, in "class A {} n, m;",
1686   // "} n, m;" will end up in one unwrapped line.
1687 }
1688 
parseObjCProtocolList()1689 void UnwrappedLineParser::parseObjCProtocolList() {
1690   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1691   do
1692     nextToken();
1693   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1694   nextToken(); // Skip '>'.
1695 }
1696 
parseObjCUntilAtEnd()1697 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1698   do {
1699     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1700       nextToken();
1701       addUnwrappedLine();
1702       break;
1703     }
1704     if (FormatTok->is(tok::l_brace)) {
1705       parseBlock(/*MustBeDeclaration=*/false);
1706       // In ObjC interfaces, nothing should be following the "}".
1707       addUnwrappedLine();
1708     } else if (FormatTok->is(tok::r_brace)) {
1709       // Ignore stray "}". parseStructuralElement doesn't consume them.
1710       nextToken();
1711       addUnwrappedLine();
1712     } else {
1713       parseStructuralElement();
1714     }
1715   } while (!eof());
1716 }
1717 
parseObjCInterfaceOrImplementation()1718 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1719   nextToken();
1720   nextToken(); // interface name
1721 
1722   // @interface can be followed by either a base class, or a category.
1723   if (FormatTok->Tok.is(tok::colon)) {
1724     nextToken();
1725     nextToken(); // base class name
1726   } else if (FormatTok->Tok.is(tok::l_paren))
1727     // Skip category, if present.
1728     parseParens();
1729 
1730   if (FormatTok->Tok.is(tok::less))
1731     parseObjCProtocolList();
1732 
1733   if (FormatTok->Tok.is(tok::l_brace)) {
1734     if (Style.BraceWrapping.AfterObjCDeclaration)
1735       addUnwrappedLine();
1736     parseBlock(/*MustBeDeclaration=*/true);
1737   }
1738 
1739   // With instance variables, this puts '}' on its own line.  Without instance
1740   // variables, this ends the @interface line.
1741   addUnwrappedLine();
1742 
1743   parseObjCUntilAtEnd();
1744 }
1745 
parseObjCProtocol()1746 void UnwrappedLineParser::parseObjCProtocol() {
1747   nextToken();
1748   nextToken(); // protocol name
1749 
1750   if (FormatTok->Tok.is(tok::less))
1751     parseObjCProtocolList();
1752 
1753   // Check for protocol declaration.
1754   if (FormatTok->Tok.is(tok::semi)) {
1755     nextToken();
1756     return addUnwrappedLine();
1757   }
1758 
1759   addUnwrappedLine();
1760   parseObjCUntilAtEnd();
1761 }
1762 
parseJavaScriptEs6ImportExport()1763 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1764   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1765   nextToken();
1766 
1767   // Consume the "default" in "export default class/function".
1768   if (FormatTok->is(tok::kw_default))
1769     nextToken();
1770 
1771   // Consume "function" and "default function", so that these get parsed as
1772   // free-standing JS functions, i.e. do not require a trailing semicolon.
1773   if (FormatTok->is(Keywords.kw_function)) {
1774     nextToken();
1775     return;
1776   }
1777 
1778   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1779                          Keywords.kw_let, Keywords.kw_var))
1780     return; // Fall through to parsing the corresponding structure.
1781 
1782   if (FormatTok->is(tok::l_brace)) {
1783     FormatTok->BlockKind = BK_Block;
1784     parseBracedList();
1785   }
1786 
1787   while (!eof() && FormatTok->isNot(tok::semi) &&
1788          FormatTok->isNot(tok::l_brace)) {
1789     nextToken();
1790   }
1791 }
1792 
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")1793 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1794                                                  StringRef Prefix = "") {
1795   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1796                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1797   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1798                                                     E = Line.Tokens.end();
1799        I != E; ++I) {
1800     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1801   }
1802   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1803                                                     E = Line.Tokens.end();
1804        I != E; ++I) {
1805     const UnwrappedLineNode &Node = *I;
1806     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1807              I = Node.Children.begin(),
1808              E = Node.Children.end();
1809          I != E; ++I) {
1810       printDebugInfo(*I, "\nChild: ");
1811     }
1812   }
1813   llvm::dbgs() << "\n";
1814 }
1815 
addUnwrappedLine()1816 void UnwrappedLineParser::addUnwrappedLine() {
1817   if (Line->Tokens.empty())
1818     return;
1819   DEBUG({
1820     if (CurrentLines == &Lines)
1821       printDebugInfo(*Line);
1822   });
1823   CurrentLines->push_back(std::move(*Line));
1824   Line->Tokens.clear();
1825   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1826     CurrentLines->append(
1827         std::make_move_iterator(PreprocessorDirectives.begin()),
1828         std::make_move_iterator(PreprocessorDirectives.end()));
1829     PreprocessorDirectives.clear();
1830   }
1831 }
1832 
eof() const1833 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1834 
isOnNewLine(const FormatToken & FormatTok)1835 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1836   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1837          FormatTok.NewlinesBefore > 0;
1838 }
1839 
flushComments(bool NewlineBeforeNext)1840 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1841   bool JustComments = Line->Tokens.empty();
1842   for (SmallVectorImpl<FormatToken *>::const_iterator
1843            I = CommentsBeforeNextToken.begin(),
1844            E = CommentsBeforeNextToken.end();
1845        I != E; ++I) {
1846     if (isOnNewLine(**I) && JustComments)
1847       addUnwrappedLine();
1848     pushToken(*I);
1849   }
1850   if (NewlineBeforeNext && JustComments)
1851     addUnwrappedLine();
1852   CommentsBeforeNextToken.clear();
1853 }
1854 
nextToken()1855 void UnwrappedLineParser::nextToken() {
1856   if (eof())
1857     return;
1858   flushComments(isOnNewLine(*FormatTok));
1859   pushToken(FormatTok);
1860   readToken();
1861 }
1862 
readToken()1863 void UnwrappedLineParser::readToken() {
1864   bool CommentsInCurrentLine = true;
1865   do {
1866     FormatTok = Tokens->getNextToken();
1867     assert(FormatTok);
1868     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1869            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1870       // If there is an unfinished unwrapped line, we flush the preprocessor
1871       // directives only after that unwrapped line was finished later.
1872       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1873       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1874       // Comments stored before the preprocessor directive need to be output
1875       // before the preprocessor directive, at the same level as the
1876       // preprocessor directive, as we consider them to apply to the directive.
1877       flushComments(isOnNewLine(*FormatTok));
1878       parsePPDirective();
1879     }
1880     while (FormatTok->Type == TT_ConflictStart ||
1881            FormatTok->Type == TT_ConflictEnd ||
1882            FormatTok->Type == TT_ConflictAlternative) {
1883       if (FormatTok->Type == TT_ConflictStart) {
1884         conditionalCompilationStart(/*Unreachable=*/false);
1885       } else if (FormatTok->Type == TT_ConflictAlternative) {
1886         conditionalCompilationAlternative();
1887       } else if (FormatTok->Type == TT_ConflictEnd) {
1888         conditionalCompilationEnd();
1889       }
1890       FormatTok = Tokens->getNextToken();
1891       FormatTok->MustBreakBefore = true;
1892     }
1893 
1894     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1895         !Line->InPPDirective) {
1896       continue;
1897     }
1898 
1899     if (!FormatTok->Tok.is(tok::comment))
1900       return;
1901     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1902       CommentsInCurrentLine = false;
1903     }
1904     if (CommentsInCurrentLine) {
1905       pushToken(FormatTok);
1906     } else {
1907       CommentsBeforeNextToken.push_back(FormatTok);
1908     }
1909   } while (!eof());
1910 }
1911 
pushToken(FormatToken * Tok)1912 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1913   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1914   if (MustBreakBeforeNextToken) {
1915     Line->Tokens.back().Tok->MustBreakBefore = true;
1916     MustBreakBeforeNextToken = false;
1917   }
1918 }
1919 
1920 } // end namespace format
1921 } // end namespace clang
1922