1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20
21 #define DEBUG_TYPE "format-parser"
22
23 namespace clang {
24 namespace format {
25
26 class FormatTokenSource {
27 public:
~FormatTokenSource()28 virtual ~FormatTokenSource() {}
29 virtual FormatToken *getNextToken() = 0;
30
31 virtual unsigned getPosition() = 0;
32 virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34
35 namespace {
36
37 class ScopedDeclarationState {
38 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40 bool MustBeDeclaration)
41 : Line(Line), Stack(Stack) {
42 Line.MustBeDeclaration = MustBeDeclaration;
43 Stack.push_back(MustBeDeclaration);
44 }
~ScopedDeclarationState()45 ~ScopedDeclarationState() {
46 Stack.pop_back();
47 if (!Stack.empty())
48 Line.MustBeDeclaration = Stack.back();
49 else
50 Line.MustBeDeclaration = true;
51 }
52
53 private:
54 UnwrappedLine &Line;
55 std::vector<bool> &Stack;
56 };
57
58 class ScopedMacroState : public FormatTokenSource {
59 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken,bool & StructuralError)60 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61 FormatToken *&ResetToken, bool &StructuralError)
62 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64 StructuralError(StructuralError),
65 PreviousStructuralError(StructuralError), Token(nullptr) {
66 TokenSource = this;
67 Line.Level = 0;
68 Line.InPPDirective = true;
69 }
70
~ScopedMacroState()71 ~ScopedMacroState() override {
72 TokenSource = PreviousTokenSource;
73 ResetToken = Token;
74 Line.InPPDirective = false;
75 Line.Level = PreviousLineLevel;
76 StructuralError = PreviousStructuralError;
77 }
78
getNextToken()79 FormatToken *getNextToken() override {
80 // The \c UnwrappedLineParser guards against this by never calling
81 // \c getNextToken() after it has encountered the first eof token.
82 assert(!eof());
83 Token = PreviousTokenSource->getNextToken();
84 if (eof())
85 return getFakeEOF();
86 return Token;
87 }
88
getPosition()89 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
90
setPosition(unsigned Position)91 FormatToken *setPosition(unsigned Position) override {
92 Token = PreviousTokenSource->setPosition(Position);
93 return Token;
94 }
95
96 private:
eof()97 bool eof() { return Token && Token->HasUnescapedNewline; }
98
getFakeEOF()99 FormatToken *getFakeEOF() {
100 static bool EOFInitialized = false;
101 static FormatToken FormatTok;
102 if (!EOFInitialized) {
103 FormatTok.Tok.startToken();
104 FormatTok.Tok.setKind(tok::eof);
105 EOFInitialized = true;
106 }
107 return &FormatTok;
108 }
109
110 UnwrappedLine &Line;
111 FormatTokenSource *&TokenSource;
112 FormatToken *&ResetToken;
113 unsigned PreviousLineLevel;
114 FormatTokenSource *PreviousTokenSource;
115 bool &StructuralError;
116 bool PreviousStructuralError;
117
118 FormatToken *Token;
119 };
120
121 } // end anonymous namespace
122
123 class ScopedLineState {
124 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)125 ScopedLineState(UnwrappedLineParser &Parser,
126 bool SwitchToPreprocessorLines = false)
127 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
128 if (SwitchToPreprocessorLines)
129 Parser.CurrentLines = &Parser.PreprocessorDirectives;
130 else if (!Parser.Line->Tokens.empty())
131 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
132 PreBlockLine = std::move(Parser.Line);
133 Parser.Line = llvm::make_unique<UnwrappedLine>();
134 Parser.Line->Level = PreBlockLine->Level;
135 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
136 }
137
~ScopedLineState()138 ~ScopedLineState() {
139 if (!Parser.Line->Tokens.empty()) {
140 Parser.addUnwrappedLine();
141 }
142 assert(Parser.Line->Tokens.empty());
143 Parser.Line = std::move(PreBlockLine);
144 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
145 Parser.MustBreakBeforeNextToken = true;
146 Parser.CurrentLines = OriginalLines;
147 }
148
149 private:
150 UnwrappedLineParser &Parser;
151
152 std::unique_ptr<UnwrappedLine> PreBlockLine;
153 SmallVectorImpl<UnwrappedLine> *OriginalLines;
154 };
155
156 class CompoundStatementIndenter {
157 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)158 CompoundStatementIndenter(UnwrappedLineParser *Parser,
159 const FormatStyle &Style, unsigned &LineLevel)
160 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
161 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
162 Parser->addUnwrappedLine();
163 } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
164 Parser->addUnwrappedLine();
165 ++LineLevel;
166 }
167 }
~CompoundStatementIndenter()168 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
169
170 private:
171 unsigned &LineLevel;
172 unsigned OldLineLevel;
173 };
174
175 namespace {
176
177 class IndexedTokenSource : public FormatTokenSource {
178 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)179 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
180 : Tokens(Tokens), Position(-1) {}
181
getNextToken()182 FormatToken *getNextToken() override {
183 ++Position;
184 return Tokens[Position];
185 }
186
getPosition()187 unsigned getPosition() override {
188 assert(Position >= 0);
189 return Position;
190 }
191
setPosition(unsigned P)192 FormatToken *setPosition(unsigned P) override {
193 Position = P;
194 return Tokens[Position];
195 }
196
reset()197 void reset() { Position = -1; }
198
199 private:
200 ArrayRef<FormatToken *> Tokens;
201 int Position;
202 };
203
204 } // end anonymous namespace
205
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)206 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
207 const AdditionalKeywords &Keywords,
208 ArrayRef<FormatToken *> Tokens,
209 UnwrappedLineConsumer &Callback)
210 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
211 CurrentLines(&Lines), StructuralError(false), Style(Style),
212 Keywords(Keywords), Tokens(nullptr), Callback(Callback),
213 AllTokens(Tokens), PPBranchLevel(-1) {}
214
reset()215 void UnwrappedLineParser::reset() {
216 PPBranchLevel = -1;
217 Line.reset(new UnwrappedLine);
218 CommentsBeforeNextToken.clear();
219 FormatTok = nullptr;
220 MustBreakBeforeNextToken = false;
221 PreprocessorDirectives.clear();
222 CurrentLines = &Lines;
223 DeclarationScopeStack.clear();
224 StructuralError = false;
225 PPStack.clear();
226 }
227
parse()228 bool UnwrappedLineParser::parse() {
229 IndexedTokenSource TokenSource(AllTokens);
230 do {
231 DEBUG(llvm::dbgs() << "----\n");
232 reset();
233 Tokens = &TokenSource;
234 TokenSource.reset();
235
236 readToken();
237 parseFile();
238 // Create line with eof token.
239 pushToken(FormatTok);
240 addUnwrappedLine();
241
242 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
243 E = Lines.end();
244 I != E; ++I) {
245 Callback.consumeUnwrappedLine(*I);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260
261 return StructuralError;
262 }
263
parseFile()264 void UnwrappedLineParser::parseFile() {
265 ScopedDeclarationState DeclarationState(
266 *Line, DeclarationScopeStack,
267 /*MustBeDeclaration=*/!Line->InPPDirective);
268 parseLevel(/*HasOpeningBrace=*/false);
269 // Make sure to format the remaining tokens.
270 flushComments(true);
271 addUnwrappedLine();
272 }
273
parseLevel(bool HasOpeningBrace)274 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
275 bool SwitchLabelEncountered = false;
276 do {
277 switch (FormatTok->Tok.getKind()) {
278 case tok::comment:
279 nextToken();
280 addUnwrappedLine();
281 break;
282 case tok::l_brace:
283 // FIXME: Add parameter whether this can happen - if this happens, we must
284 // be in a non-declaration context.
285 parseBlock(/*MustBeDeclaration=*/false);
286 addUnwrappedLine();
287 break;
288 case tok::r_brace:
289 if (HasOpeningBrace)
290 return;
291 StructuralError = true;
292 nextToken();
293 addUnwrappedLine();
294 break;
295 case tok::kw_default:
296 case tok::kw_case:
297 if (!SwitchLabelEncountered &&
298 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
299 ++Line->Level;
300 SwitchLabelEncountered = true;
301 parseStructuralElement();
302 break;
303 default:
304 parseStructuralElement();
305 break;
306 }
307 } while (!eof());
308 }
309
calculateBraceTypes()310 void UnwrappedLineParser::calculateBraceTypes() {
311 // We'll parse forward through the tokens until we hit
312 // a closing brace or eof - note that getNextToken() will
313 // parse macros, so this will magically work inside macro
314 // definitions, too.
315 unsigned StoredPosition = Tokens->getPosition();
316 FormatToken *Tok = FormatTok;
317 // Keep a stack of positions of lbrace tokens. We will
318 // update information about whether an lbrace starts a
319 // braced init list or a different block during the loop.
320 SmallVector<FormatToken *, 8> LBraceStack;
321 assert(Tok->Tok.is(tok::l_brace));
322 do {
323 // Get next none-comment token.
324 FormatToken *NextTok;
325 unsigned ReadTokens = 0;
326 do {
327 NextTok = Tokens->getNextToken();
328 ++ReadTokens;
329 } while (NextTok->is(tok::comment));
330
331 switch (Tok->Tok.getKind()) {
332 case tok::l_brace:
333 LBraceStack.push_back(Tok);
334 break;
335 case tok::r_brace:
336 if (!LBraceStack.empty()) {
337 if (LBraceStack.back()->BlockKind == BK_Unknown) {
338 bool ProbablyBracedList = false;
339 if (Style.Language == FormatStyle::LK_Proto) {
340 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
341 } else {
342 // Using OriginalColumn to distinguish between ObjC methods and
343 // binary operators is a bit hacky.
344 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
345 NextTok->OriginalColumn == 0;
346
347 // If there is a comma, semicolon or right paren after the closing
348 // brace, we assume this is a braced initializer list. Note that
349 // regardless how we mark inner braces here, we will overwrite the
350 // BlockKind later if we parse a braced list (where all blocks
351 // inside are by default braced lists), or when we explicitly detect
352 // blocks (for example while parsing lambdas).
353 //
354 // We exclude + and - as they can be ObjC visibility modifiers.
355 ProbablyBracedList =
356 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon,
357 tok::r_paren, tok::r_square, tok::l_brace,
358 tok::l_paren, tok::ellipsis) ||
359 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
360 }
361 if (ProbablyBracedList) {
362 Tok->BlockKind = BK_BracedInit;
363 LBraceStack.back()->BlockKind = BK_BracedInit;
364 } else {
365 Tok->BlockKind = BK_Block;
366 LBraceStack.back()->BlockKind = BK_Block;
367 }
368 }
369 LBraceStack.pop_back();
370 }
371 break;
372 case tok::at:
373 case tok::semi:
374 case tok::kw_if:
375 case tok::kw_while:
376 case tok::kw_for:
377 case tok::kw_switch:
378 case tok::kw_try:
379 case tok::kw___try:
380 if (!LBraceStack.empty())
381 LBraceStack.back()->BlockKind = BK_Block;
382 break;
383 default:
384 break;
385 }
386 Tok = NextTok;
387 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
388 // Assume other blocks for all unclosed opening braces.
389 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
390 if (LBraceStack[i]->BlockKind == BK_Unknown)
391 LBraceStack[i]->BlockKind = BK_Block;
392 }
393
394 FormatTok = Tokens->setPosition(StoredPosition);
395 }
396
parseBlock(bool MustBeDeclaration,bool AddLevel,bool MunchSemi)397 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
398 bool MunchSemi) {
399 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
400 unsigned InitialLevel = Line->Level;
401 nextToken();
402
403 addUnwrappedLine();
404
405 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
406 MustBeDeclaration);
407 if (AddLevel)
408 ++Line->Level;
409 parseLevel(/*HasOpeningBrace=*/true);
410
411 if (!FormatTok->Tok.is(tok::r_brace)) {
412 Line->Level = InitialLevel;
413 StructuralError = true;
414 return;
415 }
416
417 nextToken(); // Munch the closing brace.
418 if (MunchSemi && FormatTok->Tok.is(tok::semi))
419 nextToken();
420 Line->Level = InitialLevel;
421 }
422
isGoogScope(const UnwrappedLine & Line)423 static bool isGoogScope(const UnwrappedLine &Line) {
424 // FIXME: Closure-library specific stuff should not be hard-coded but be
425 // configurable.
426 if (Line.Tokens.size() < 4)
427 return false;
428 auto I = Line.Tokens.begin();
429 if (I->Tok->TokenText != "goog")
430 return false;
431 ++I;
432 if (I->Tok->isNot(tok::period))
433 return false;
434 ++I;
435 if (I->Tok->TokenText != "scope")
436 return false;
437 ++I;
438 return I->Tok->is(tok::l_paren);
439 }
440
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)441 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
442 const FormatToken &InitialToken) {
443 switch (Style.BreakBeforeBraces) {
444 case FormatStyle::BS_Linux:
445 return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
446 case FormatStyle::BS_Allman:
447 case FormatStyle::BS_GNU:
448 return true;
449 default:
450 return false;
451 }
452 }
453
parseChildBlock()454 void UnwrappedLineParser::parseChildBlock() {
455 FormatTok->BlockKind = BK_Block;
456 nextToken();
457 {
458 bool GoogScope =
459 Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
460 ScopedLineState LineState(*this);
461 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
462 /*MustBeDeclaration=*/false);
463 Line->Level += GoogScope ? 0 : 1;
464 parseLevel(/*HasOpeningBrace=*/true);
465 flushComments(isOnNewLine(*FormatTok));
466 Line->Level -= GoogScope ? 0 : 1;
467 }
468 nextToken();
469 }
470
parsePPDirective()471 void UnwrappedLineParser::parsePPDirective() {
472 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
473 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
474 nextToken();
475
476 if (!FormatTok->Tok.getIdentifierInfo()) {
477 parsePPUnknown();
478 return;
479 }
480
481 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
482 case tok::pp_define:
483 parsePPDefine();
484 return;
485 case tok::pp_if:
486 parsePPIf(/*IfDef=*/false);
487 break;
488 case tok::pp_ifdef:
489 case tok::pp_ifndef:
490 parsePPIf(/*IfDef=*/true);
491 break;
492 case tok::pp_else:
493 parsePPElse();
494 break;
495 case tok::pp_elif:
496 parsePPElIf();
497 break;
498 case tok::pp_endif:
499 parsePPEndIf();
500 break;
501 default:
502 parsePPUnknown();
503 break;
504 }
505 }
506
conditionalCompilationCondition(bool Unreachable)507 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
508 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
509 PPStack.push_back(PP_Unreachable);
510 else
511 PPStack.push_back(PP_Conditional);
512 }
513
conditionalCompilationStart(bool Unreachable)514 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
515 ++PPBranchLevel;
516 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
517 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
518 PPLevelBranchIndex.push_back(0);
519 PPLevelBranchCount.push_back(0);
520 }
521 PPChainBranchIndex.push(0);
522 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
523 conditionalCompilationCondition(Unreachable || Skip);
524 }
525
conditionalCompilationAlternative()526 void UnwrappedLineParser::conditionalCompilationAlternative() {
527 if (!PPStack.empty())
528 PPStack.pop_back();
529 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
530 if (!PPChainBranchIndex.empty())
531 ++PPChainBranchIndex.top();
532 conditionalCompilationCondition(
533 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
534 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
535 }
536
conditionalCompilationEnd()537 void UnwrappedLineParser::conditionalCompilationEnd() {
538 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
539 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
540 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
541 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
542 }
543 }
544 // Guard against #endif's without #if.
545 if (PPBranchLevel > 0)
546 --PPBranchLevel;
547 if (!PPChainBranchIndex.empty())
548 PPChainBranchIndex.pop();
549 if (!PPStack.empty())
550 PPStack.pop_back();
551 }
552
parsePPIf(bool IfDef)553 void UnwrappedLineParser::parsePPIf(bool IfDef) {
554 nextToken();
555 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
556 FormatTok->Tok.getLiteralData() != nullptr &&
557 StringRef(FormatTok->Tok.getLiteralData(),
558 FormatTok->Tok.getLength()) == "0") ||
559 FormatTok->Tok.is(tok::kw_false);
560 conditionalCompilationStart(!IfDef && IsLiteralFalse);
561 parsePPUnknown();
562 }
563
parsePPElse()564 void UnwrappedLineParser::parsePPElse() {
565 conditionalCompilationAlternative();
566 parsePPUnknown();
567 }
568
parsePPElIf()569 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
570
parsePPEndIf()571 void UnwrappedLineParser::parsePPEndIf() {
572 conditionalCompilationEnd();
573 parsePPUnknown();
574 }
575
parsePPDefine()576 void UnwrappedLineParser::parsePPDefine() {
577 nextToken();
578
579 if (FormatTok->Tok.getKind() != tok::identifier) {
580 parsePPUnknown();
581 return;
582 }
583 nextToken();
584 if (FormatTok->Tok.getKind() == tok::l_paren &&
585 FormatTok->WhitespaceRange.getBegin() ==
586 FormatTok->WhitespaceRange.getEnd()) {
587 parseParens();
588 }
589 addUnwrappedLine();
590 Line->Level = 1;
591
592 // Errors during a preprocessor directive can only affect the layout of the
593 // preprocessor directive, and thus we ignore them. An alternative approach
594 // would be to use the same approach we use on the file level (no
595 // re-indentation if there was a structural error) within the macro
596 // definition.
597 parseFile();
598 }
599
parsePPUnknown()600 void UnwrappedLineParser::parsePPUnknown() {
601 do {
602 nextToken();
603 } while (!eof());
604 addUnwrappedLine();
605 }
606
607 // Here we blacklist certain tokens that are not usually the first token in an
608 // unwrapped line. This is used in attempt to distinguish macro calls without
609 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const clang::Token & Tok)610 static bool tokenCanStartNewLine(const clang::Token &Tok) {
611 // Semicolon can be a null-statement, l_square can be a start of a macro or
612 // a C++11 attribute, but this doesn't seem to be common.
613 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
614 Tok.isNot(tok::l_square) &&
615 // Tokens that can only be used as binary operators and a part of
616 // overloaded operator names.
617 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
618 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
619 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
620 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
621 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
622 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
623 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
624 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
625 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
626 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
627 Tok.isNot(tok::lesslessequal) &&
628 // Colon is used in labels, base class lists, initializer lists,
629 // range-based for loops, ternary operator, but should never be the
630 // first token in an unwrapped line.
631 Tok.isNot(tok::colon) &&
632 // 'noexcept' is a trailing annotation.
633 Tok.isNot(tok::kw_noexcept);
634 }
635
parseStructuralElement()636 void UnwrappedLineParser::parseStructuralElement() {
637 assert(!FormatTok->Tok.is(tok::l_brace));
638 switch (FormatTok->Tok.getKind()) {
639 case tok::at:
640 nextToken();
641 if (FormatTok->Tok.is(tok::l_brace)) {
642 parseBracedList();
643 break;
644 }
645 switch (FormatTok->Tok.getObjCKeywordID()) {
646 case tok::objc_public:
647 case tok::objc_protected:
648 case tok::objc_package:
649 case tok::objc_private:
650 return parseAccessSpecifier();
651 case tok::objc_interface:
652 case tok::objc_implementation:
653 return parseObjCInterfaceOrImplementation();
654 case tok::objc_protocol:
655 return parseObjCProtocol();
656 case tok::objc_end:
657 return; // Handled by the caller.
658 case tok::objc_optional:
659 case tok::objc_required:
660 nextToken();
661 addUnwrappedLine();
662 return;
663 case tok::objc_try:
664 // This branch isn't strictly necessary (the kw_try case below would
665 // do this too after the tok::at is parsed above). But be explicit.
666 parseTryCatch();
667 return;
668 default:
669 break;
670 }
671 break;
672 case tok::kw_asm:
673 nextToken();
674 if (FormatTok->is(tok::l_brace)) {
675 nextToken();
676 while (FormatTok && FormatTok->isNot(tok::eof)) {
677 if (FormatTok->is(tok::r_brace)) {
678 nextToken();
679 break;
680 }
681 FormatTok->Finalized = true;
682 nextToken();
683 }
684 }
685 break;
686 case tok::kw_namespace:
687 parseNamespace();
688 return;
689 case tok::kw_inline:
690 nextToken();
691 if (FormatTok->Tok.is(tok::kw_namespace)) {
692 parseNamespace();
693 return;
694 }
695 break;
696 case tok::kw_public:
697 case tok::kw_protected:
698 case tok::kw_private:
699 if (Style.Language == FormatStyle::LK_Java ||
700 Style.Language == FormatStyle::LK_JavaScript)
701 nextToken();
702 else
703 parseAccessSpecifier();
704 return;
705 case tok::kw_if:
706 parseIfThenElse();
707 return;
708 case tok::kw_for:
709 case tok::kw_while:
710 parseForOrWhileLoop();
711 return;
712 case tok::kw_do:
713 parseDoWhile();
714 return;
715 case tok::kw_switch:
716 parseSwitch();
717 return;
718 case tok::kw_default:
719 nextToken();
720 parseLabel();
721 return;
722 case tok::kw_case:
723 parseCaseLabel();
724 return;
725 case tok::kw_try:
726 case tok::kw___try:
727 parseTryCatch();
728 return;
729 case tok::kw_extern:
730 nextToken();
731 if (FormatTok->Tok.is(tok::string_literal)) {
732 nextToken();
733 if (FormatTok->Tok.is(tok::l_brace)) {
734 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
735 addUnwrappedLine();
736 return;
737 }
738 }
739 break;
740 case tok::kw_export:
741 if (Style.Language == FormatStyle::LK_JavaScript) {
742 parseJavaScriptEs6ImportExport();
743 return;
744 }
745 break;
746 case tok::identifier:
747 if (FormatTok->IsForEachMacro) {
748 parseForOrWhileLoop();
749 return;
750 }
751 if (Style.Language == FormatStyle::LK_JavaScript &&
752 FormatTok->is(Keywords.kw_import)) {
753 parseJavaScriptEs6ImportExport();
754 return;
755 }
756 if (FormatTok->is(Keywords.kw_signals)) {
757 parseAccessSpecifier();
758 return;
759 }
760 // In all other cases, parse the declaration.
761 break;
762 default:
763 break;
764 }
765 do {
766 switch (FormatTok->Tok.getKind()) {
767 case tok::at:
768 nextToken();
769 if (FormatTok->Tok.is(tok::l_brace))
770 parseBracedList();
771 break;
772 case tok::kw_enum:
773 parseEnum();
774 break;
775 case tok::kw_typedef:
776 nextToken();
777 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
778 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
779 parseEnum();
780 break;
781 case tok::kw_struct:
782 case tok::kw_union:
783 case tok::kw_class:
784 parseRecord();
785 // A record declaration or definition is always the start of a structural
786 // element.
787 break;
788 case tok::period:
789 nextToken();
790 // In Java, classes have an implicit static member "class".
791 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
792 FormatTok->is(tok::kw_class))
793 nextToken();
794 break;
795 case tok::semi:
796 nextToken();
797 addUnwrappedLine();
798 return;
799 case tok::r_brace:
800 addUnwrappedLine();
801 return;
802 case tok::l_paren:
803 parseParens();
804 break;
805 case tok::caret:
806 nextToken();
807 if (FormatTok->Tok.isAnyIdentifier() ||
808 FormatTok->isSimpleTypeSpecifier())
809 nextToken();
810 if (FormatTok->is(tok::l_paren))
811 parseParens();
812 if (FormatTok->is(tok::l_brace))
813 parseChildBlock();
814 break;
815 case tok::l_brace:
816 if (!tryToParseBracedList()) {
817 // A block outside of parentheses must be the last part of a
818 // structural element.
819 // FIXME: Figure out cases where this is not true, and add projections
820 // for them (the one we know is missing are lambdas).
821 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
822 addUnwrappedLine();
823 FormatTok->Type = TT_FunctionLBrace;
824 parseBlock(/*MustBeDeclaration=*/false);
825 addUnwrappedLine();
826 return;
827 }
828 // Otherwise this was a braced init list, and the structural
829 // element continues.
830 break;
831 case tok::kw_try:
832 // We arrive here when parsing function-try blocks.
833 parseTryCatch();
834 return;
835 case tok::identifier: {
836 StringRef Text = FormatTok->TokenText;
837 // Parse function literal unless 'function' is the first token in a line
838 // in which case this should be treated as a free-standing function.
839 if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" &&
840 Line->Tokens.size() > 0) {
841 tryToParseJSFunction();
842 break;
843 }
844 nextToken();
845 if (Line->Tokens.size() == 1 &&
846 // JS doesn't have macros, and within classes colons indicate fields,
847 // not labels.
848 Style.Language != FormatStyle::LK_JavaScript) {
849 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
850 parseLabel();
851 return;
852 }
853 // Recognize function-like macro usages without trailing semicolon as
854 // well as free-standing macros like Q_OBJECT.
855 bool FunctionLike = FormatTok->is(tok::l_paren);
856 if (FunctionLike)
857 parseParens();
858 if (FormatTok->NewlinesBefore > 0 &&
859 (Text.size() >= 5 || FunctionLike) &&
860 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
861 addUnwrappedLine();
862 return;
863 }
864 }
865 break;
866 }
867 case tok::equal:
868 nextToken();
869 if (FormatTok->Tok.is(tok::l_brace)) {
870 parseBracedList();
871 }
872 break;
873 case tok::l_square:
874 parseSquare();
875 break;
876 case tok::kw_new:
877 parseNew();
878 break;
879 default:
880 nextToken();
881 break;
882 }
883 } while (!eof());
884 }
885
tryToParseLambda()886 bool UnwrappedLineParser::tryToParseLambda() {
887 // FIXME: This is a dirty way to access the previous token. Find a better
888 // solution.
889 if (!Line->Tokens.empty() &&
890 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
891 tok::kw_new, tok::kw_delete) ||
892 Line->Tokens.back().Tok->closesScope() ||
893 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
894 nextToken();
895 return false;
896 }
897 assert(FormatTok->is(tok::l_square));
898 FormatToken &LSquare = *FormatTok;
899 if (!tryToParseLambdaIntroducer())
900 return false;
901
902 while (FormatTok->isNot(tok::l_brace)) {
903 if (FormatTok->isSimpleTypeSpecifier()) {
904 nextToken();
905 continue;
906 }
907 switch (FormatTok->Tok.getKind()) {
908 case tok::l_brace:
909 break;
910 case tok::l_paren:
911 parseParens();
912 break;
913 case tok::amp:
914 case tok::star:
915 case tok::kw_const:
916 case tok::comma:
917 case tok::less:
918 case tok::greater:
919 case tok::identifier:
920 case tok::coloncolon:
921 case tok::kw_mutable:
922 nextToken();
923 break;
924 case tok::arrow:
925 FormatTok->Type = TT_TrailingReturnArrow;
926 nextToken();
927 break;
928 default:
929 return true;
930 }
931 }
932 LSquare.Type = TT_LambdaLSquare;
933 parseChildBlock();
934 return true;
935 }
936
tryToParseLambdaIntroducer()937 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
938 nextToken();
939 if (FormatTok->is(tok::equal)) {
940 nextToken();
941 if (FormatTok->is(tok::r_square)) {
942 nextToken();
943 return true;
944 }
945 if (FormatTok->isNot(tok::comma))
946 return false;
947 nextToken();
948 } else if (FormatTok->is(tok::amp)) {
949 nextToken();
950 if (FormatTok->is(tok::r_square)) {
951 nextToken();
952 return true;
953 }
954 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
955 return false;
956 }
957 if (FormatTok->is(tok::comma))
958 nextToken();
959 } else if (FormatTok->is(tok::r_square)) {
960 nextToken();
961 return true;
962 }
963 do {
964 if (FormatTok->is(tok::amp))
965 nextToken();
966 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
967 return false;
968 nextToken();
969 if (FormatTok->is(tok::ellipsis))
970 nextToken();
971 if (FormatTok->is(tok::comma)) {
972 nextToken();
973 } else if (FormatTok->is(tok::r_square)) {
974 nextToken();
975 return true;
976 } else {
977 return false;
978 }
979 } while (!eof());
980 return false;
981 }
982
tryToParseJSFunction()983 void UnwrappedLineParser::tryToParseJSFunction() {
984 nextToken();
985
986 // Consume function name.
987 if (FormatTok->is(tok::identifier))
988 nextToken();
989
990 if (FormatTok->isNot(tok::l_paren))
991 return;
992 nextToken();
993 while (FormatTok->isNot(tok::l_brace)) {
994 // Err on the side of caution in order to avoid consuming the full file in
995 // case of incomplete code.
996 if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
997 tok::comment))
998 return;
999 nextToken();
1000 }
1001 parseChildBlock();
1002 }
1003
tryToParseBracedList()1004 bool UnwrappedLineParser::tryToParseBracedList() {
1005 if (FormatTok->BlockKind == BK_Unknown)
1006 calculateBraceTypes();
1007 assert(FormatTok->BlockKind != BK_Unknown);
1008 if (FormatTok->BlockKind == BK_Block)
1009 return false;
1010 parseBracedList();
1011 return true;
1012 }
1013
parseBracedList(bool ContinueOnSemicolons)1014 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1015 bool HasError = false;
1016 nextToken();
1017
1018 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1019 // replace this by using parseAssigmentExpression() inside.
1020 do {
1021 if (Style.Language == FormatStyle::LK_JavaScript &&
1022 FormatTok->is(Keywords.kw_function)) {
1023 tryToParseJSFunction();
1024 continue;
1025 }
1026 switch (FormatTok->Tok.getKind()) {
1027 case tok::caret:
1028 nextToken();
1029 if (FormatTok->is(tok::l_brace)) {
1030 parseChildBlock();
1031 }
1032 break;
1033 case tok::l_square:
1034 tryToParseLambda();
1035 break;
1036 case tok::l_brace:
1037 // Assume there are no blocks inside a braced init list apart
1038 // from the ones we explicitly parse out (like lambdas).
1039 FormatTok->BlockKind = BK_BracedInit;
1040 parseBracedList();
1041 break;
1042 case tok::r_paren:
1043 // JavaScript can just have free standing methods and getters/setters in
1044 // object literals. Detect them by a "{" following ")".
1045 if (Style.Language == FormatStyle::LK_JavaScript) {
1046 nextToken();
1047 if (FormatTok->is(tok::l_brace))
1048 parseChildBlock();
1049 break;
1050 }
1051 nextToken();
1052 break;
1053 case tok::r_brace:
1054 nextToken();
1055 return !HasError;
1056 case tok::semi:
1057 HasError = true;
1058 if (!ContinueOnSemicolons)
1059 return !HasError;
1060 nextToken();
1061 break;
1062 case tok::comma:
1063 nextToken();
1064 break;
1065 default:
1066 nextToken();
1067 break;
1068 }
1069 } while (!eof());
1070 return false;
1071 }
1072
parseParens()1073 void UnwrappedLineParser::parseParens() {
1074 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1075 nextToken();
1076 do {
1077 switch (FormatTok->Tok.getKind()) {
1078 case tok::l_paren:
1079 parseParens();
1080 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1081 parseChildBlock();
1082 break;
1083 case tok::r_paren:
1084 nextToken();
1085 return;
1086 case tok::r_brace:
1087 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1088 return;
1089 case tok::l_square:
1090 tryToParseLambda();
1091 break;
1092 case tok::l_brace:
1093 if (!tryToParseBracedList()) {
1094 parseChildBlock();
1095 }
1096 break;
1097 case tok::at:
1098 nextToken();
1099 if (FormatTok->Tok.is(tok::l_brace))
1100 parseBracedList();
1101 break;
1102 case tok::identifier:
1103 if (Style.Language == FormatStyle::LK_JavaScript &&
1104 FormatTok->is(Keywords.kw_function))
1105 tryToParseJSFunction();
1106 else
1107 nextToken();
1108 break;
1109 default:
1110 nextToken();
1111 break;
1112 }
1113 } while (!eof());
1114 }
1115
parseSquare()1116 void UnwrappedLineParser::parseSquare() {
1117 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1118 if (tryToParseLambda())
1119 return;
1120 do {
1121 switch (FormatTok->Tok.getKind()) {
1122 case tok::l_paren:
1123 parseParens();
1124 break;
1125 case tok::r_square:
1126 nextToken();
1127 return;
1128 case tok::r_brace:
1129 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1130 return;
1131 case tok::l_square:
1132 parseSquare();
1133 break;
1134 case tok::l_brace: {
1135 if (!tryToParseBracedList()) {
1136 parseChildBlock();
1137 }
1138 break;
1139 }
1140 case tok::at:
1141 nextToken();
1142 if (FormatTok->Tok.is(tok::l_brace))
1143 parseBracedList();
1144 break;
1145 default:
1146 nextToken();
1147 break;
1148 }
1149 } while (!eof());
1150 }
1151
parseIfThenElse()1152 void UnwrappedLineParser::parseIfThenElse() {
1153 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1154 nextToken();
1155 if (FormatTok->Tok.is(tok::l_paren))
1156 parseParens();
1157 bool NeedsUnwrappedLine = false;
1158 if (FormatTok->Tok.is(tok::l_brace)) {
1159 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1160 parseBlock(/*MustBeDeclaration=*/false);
1161 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1162 Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1163 addUnwrappedLine();
1164 } else {
1165 NeedsUnwrappedLine = true;
1166 }
1167 } else {
1168 addUnwrappedLine();
1169 ++Line->Level;
1170 parseStructuralElement();
1171 --Line->Level;
1172 }
1173 if (FormatTok->Tok.is(tok::kw_else)) {
1174 if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1175 addUnwrappedLine();
1176 nextToken();
1177 if (FormatTok->Tok.is(tok::l_brace)) {
1178 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1179 parseBlock(/*MustBeDeclaration=*/false);
1180 addUnwrappedLine();
1181 } else if (FormatTok->Tok.is(tok::kw_if)) {
1182 parseIfThenElse();
1183 } else {
1184 addUnwrappedLine();
1185 ++Line->Level;
1186 parseStructuralElement();
1187 --Line->Level;
1188 }
1189 } else if (NeedsUnwrappedLine) {
1190 addUnwrappedLine();
1191 }
1192 }
1193
parseTryCatch()1194 void UnwrappedLineParser::parseTryCatch() {
1195 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1196 nextToken();
1197 bool NeedsUnwrappedLine = false;
1198 if (FormatTok->is(tok::colon)) {
1199 // We are in a function try block, what comes is an initializer list.
1200 nextToken();
1201 while (FormatTok->is(tok::identifier)) {
1202 nextToken();
1203 if (FormatTok->is(tok::l_paren))
1204 parseParens();
1205 else
1206 StructuralError = true;
1207 if (FormatTok->is(tok::comma))
1208 nextToken();
1209 }
1210 }
1211 // Parse try with resource.
1212 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1213 parseParens();
1214 }
1215 if (FormatTok->is(tok::l_brace)) {
1216 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1217 parseBlock(/*MustBeDeclaration=*/false);
1218 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1219 Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1220 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1221 addUnwrappedLine();
1222 } else {
1223 NeedsUnwrappedLine = true;
1224 }
1225 } else if (!FormatTok->is(tok::kw_catch)) {
1226 // The C++ standard requires a compound-statement after a try.
1227 // If there's none, we try to assume there's a structuralElement
1228 // and try to continue.
1229 StructuralError = true;
1230 addUnwrappedLine();
1231 ++Line->Level;
1232 parseStructuralElement();
1233 --Line->Level;
1234 }
1235 while (1) {
1236 if (FormatTok->is(tok::at))
1237 nextToken();
1238 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1239 tok::kw___finally) ||
1240 ((Style.Language == FormatStyle::LK_Java ||
1241 Style.Language == FormatStyle::LK_JavaScript) &&
1242 FormatTok->is(Keywords.kw_finally)) ||
1243 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1244 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1245 break;
1246 nextToken();
1247 while (FormatTok->isNot(tok::l_brace)) {
1248 if (FormatTok->is(tok::l_paren)) {
1249 parseParens();
1250 continue;
1251 }
1252 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1253 return;
1254 nextToken();
1255 }
1256 NeedsUnwrappedLine = false;
1257 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1258 parseBlock(/*MustBeDeclaration=*/false);
1259 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1260 Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1261 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1262 addUnwrappedLine();
1263 } else {
1264 NeedsUnwrappedLine = true;
1265 }
1266 }
1267 if (NeedsUnwrappedLine) {
1268 addUnwrappedLine();
1269 }
1270 }
1271
parseNamespace()1272 void UnwrappedLineParser::parseNamespace() {
1273 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1274
1275 const FormatToken &InitialToken = *FormatTok;
1276 nextToken();
1277 if (FormatTok->Tok.is(tok::identifier))
1278 nextToken();
1279 if (FormatTok->Tok.is(tok::l_brace)) {
1280 if (ShouldBreakBeforeBrace(Style, InitialToken))
1281 addUnwrappedLine();
1282
1283 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1284 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1285 DeclarationScopeStack.size() > 1);
1286 parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1287 // Munch the semicolon after a namespace. This is more common than one would
1288 // think. Puttin the semicolon into its own line is very ugly.
1289 if (FormatTok->Tok.is(tok::semi))
1290 nextToken();
1291 addUnwrappedLine();
1292 }
1293 // FIXME: Add error handling.
1294 }
1295
parseNew()1296 void UnwrappedLineParser::parseNew() {
1297 assert(FormatTok->is(tok::kw_new) && "'new' expected");
1298 nextToken();
1299 if (Style.Language != FormatStyle::LK_Java)
1300 return;
1301
1302 // In Java, we can parse everything up to the parens, which aren't optional.
1303 do {
1304 // There should not be a ;, { or } before the new's open paren.
1305 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1306 return;
1307
1308 // Consume the parens.
1309 if (FormatTok->is(tok::l_paren)) {
1310 parseParens();
1311
1312 // If there is a class body of an anonymous class, consume that as child.
1313 if (FormatTok->is(tok::l_brace))
1314 parseChildBlock();
1315 return;
1316 }
1317 nextToken();
1318 } while (!eof());
1319 }
1320
parseForOrWhileLoop()1321 void UnwrappedLineParser::parseForOrWhileLoop() {
1322 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) ||
1323 FormatTok->IsForEachMacro) &&
1324 "'for', 'while' or foreach macro expected");
1325 nextToken();
1326 if (FormatTok->Tok.is(tok::l_paren))
1327 parseParens();
1328 if (FormatTok->Tok.is(tok::l_brace)) {
1329 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1330 parseBlock(/*MustBeDeclaration=*/false);
1331 addUnwrappedLine();
1332 } else {
1333 addUnwrappedLine();
1334 ++Line->Level;
1335 parseStructuralElement();
1336 --Line->Level;
1337 }
1338 }
1339
parseDoWhile()1340 void UnwrappedLineParser::parseDoWhile() {
1341 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1342 nextToken();
1343 if (FormatTok->Tok.is(tok::l_brace)) {
1344 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1345 parseBlock(/*MustBeDeclaration=*/false);
1346 if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1347 addUnwrappedLine();
1348 } else {
1349 addUnwrappedLine();
1350 ++Line->Level;
1351 parseStructuralElement();
1352 --Line->Level;
1353 }
1354
1355 // FIXME: Add error handling.
1356 if (!FormatTok->Tok.is(tok::kw_while)) {
1357 addUnwrappedLine();
1358 return;
1359 }
1360
1361 nextToken();
1362 parseStructuralElement();
1363 }
1364
parseLabel()1365 void UnwrappedLineParser::parseLabel() {
1366 nextToken();
1367 unsigned OldLineLevel = Line->Level;
1368 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1369 --Line->Level;
1370 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1371 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1372 parseBlock(/*MustBeDeclaration=*/false);
1373 if (FormatTok->Tok.is(tok::kw_break)) {
1374 // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1375 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1376 Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1377 addUnwrappedLine();
1378 }
1379 parseStructuralElement();
1380 }
1381 addUnwrappedLine();
1382 } else {
1383 addUnwrappedLine();
1384 }
1385 Line->Level = OldLineLevel;
1386 }
1387
parseCaseLabel()1388 void UnwrappedLineParser::parseCaseLabel() {
1389 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1390 // FIXME: fix handling of complex expressions here.
1391 do {
1392 nextToken();
1393 } while (!eof() && !FormatTok->Tok.is(tok::colon));
1394 parseLabel();
1395 }
1396
parseSwitch()1397 void UnwrappedLineParser::parseSwitch() {
1398 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1399 nextToken();
1400 if (FormatTok->Tok.is(tok::l_paren))
1401 parseParens();
1402 if (FormatTok->Tok.is(tok::l_brace)) {
1403 CompoundStatementIndenter Indenter(this, Style, Line->Level);
1404 parseBlock(/*MustBeDeclaration=*/false);
1405 addUnwrappedLine();
1406 } else {
1407 addUnwrappedLine();
1408 ++Line->Level;
1409 parseStructuralElement();
1410 --Line->Level;
1411 }
1412 }
1413
parseAccessSpecifier()1414 void UnwrappedLineParser::parseAccessSpecifier() {
1415 nextToken();
1416 // Understand Qt's slots.
1417 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1418 nextToken();
1419 // Otherwise, we don't know what it is, and we'd better keep the next token.
1420 if (FormatTok->Tok.is(tok::colon))
1421 nextToken();
1422 addUnwrappedLine();
1423 }
1424
parseEnum()1425 void UnwrappedLineParser::parseEnum() {
1426 // Won't be 'enum' for NS_ENUMs.
1427 if (FormatTok->Tok.is(tok::kw_enum))
1428 nextToken();
1429
1430 // Eat up enum class ...
1431 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1432 nextToken();
1433 while (FormatTok->Tok.getIdentifierInfo() ||
1434 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1435 tok::greater, tok::comma, tok::question)) {
1436 nextToken();
1437 // We can have macros or attributes in between 'enum' and the enum name.
1438 if (FormatTok->is(tok::l_paren))
1439 parseParens();
1440 if (FormatTok->is(tok::identifier))
1441 nextToken();
1442 }
1443
1444 // Just a declaration or something is wrong.
1445 if (FormatTok->isNot(tok::l_brace))
1446 return;
1447 FormatTok->BlockKind = BK_Block;
1448
1449 if (Style.Language == FormatStyle::LK_Java) {
1450 // Java enums are different.
1451 parseJavaEnumBody();
1452 return;
1453 }
1454
1455 // Parse enum body.
1456 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1457 if (HasError) {
1458 if (FormatTok->is(tok::semi))
1459 nextToken();
1460 addUnwrappedLine();
1461 }
1462
1463 // We fall through to parsing a structural element afterwards, so that in
1464 // enum A {} n, m;
1465 // "} n, m;" will end up in one unwrapped line.
1466 }
1467
parseJavaEnumBody()1468 void UnwrappedLineParser::parseJavaEnumBody() {
1469 // Determine whether the enum is simple, i.e. does not have a semicolon or
1470 // constants with class bodies. Simple enums can be formatted like braced
1471 // lists, contracted to a single line, etc.
1472 unsigned StoredPosition = Tokens->getPosition();
1473 bool IsSimple = true;
1474 FormatToken *Tok = Tokens->getNextToken();
1475 while (Tok) {
1476 if (Tok->is(tok::r_brace))
1477 break;
1478 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1479 IsSimple = false;
1480 break;
1481 }
1482 // FIXME: This will also mark enums with braces in the arguments to enum
1483 // constants as "not simple". This is probably fine in practice, though.
1484 Tok = Tokens->getNextToken();
1485 }
1486 FormatTok = Tokens->setPosition(StoredPosition);
1487
1488 if (IsSimple) {
1489 parseBracedList();
1490 addUnwrappedLine();
1491 return;
1492 }
1493
1494 // Parse the body of a more complex enum.
1495 // First add a line for everything up to the "{".
1496 nextToken();
1497 addUnwrappedLine();
1498 ++Line->Level;
1499
1500 // Parse the enum constants.
1501 while (FormatTok) {
1502 if (FormatTok->is(tok::l_brace)) {
1503 // Parse the constant's class body.
1504 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1505 /*MunchSemi=*/false);
1506 } else if (FormatTok->is(tok::l_paren)) {
1507 parseParens();
1508 } else if (FormatTok->is(tok::comma)) {
1509 nextToken();
1510 addUnwrappedLine();
1511 } else if (FormatTok->is(tok::semi)) {
1512 nextToken();
1513 addUnwrappedLine();
1514 break;
1515 } else if (FormatTok->is(tok::r_brace)) {
1516 addUnwrappedLine();
1517 break;
1518 } else {
1519 nextToken();
1520 }
1521 }
1522
1523 // Parse the class body after the enum's ";" if any.
1524 parseLevel(/*HasOpeningBrace=*/true);
1525 nextToken();
1526 --Line->Level;
1527 addUnwrappedLine();
1528 }
1529
parseRecord()1530 void UnwrappedLineParser::parseRecord() {
1531 const FormatToken &InitialToken = *FormatTok;
1532 nextToken();
1533 if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute,
1534 tok::kw___declspec, tok::kw_alignas)) {
1535 nextToken();
1536 // We can have macros or attributes in between 'class' and the class name.
1537 if (FormatTok->Tok.is(tok::l_paren)) {
1538 parseParens();
1539 }
1540 // The actual identifier can be a nested name specifier, and in macros
1541 // it is often token-pasted.
1542 while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) ||
1543 FormatTok->is(tok::hashhash) ||
1544 ((Style.Language == FormatStyle::LK_Java ||
1545 Style.Language == FormatStyle::LK_JavaScript) &&
1546 FormatTok->isOneOf(tok::period, tok::comma)))
1547 nextToken();
1548
1549 // Note that parsing away template declarations here leads to incorrectly
1550 // accepting function declarations as record declarations.
1551 // In general, we cannot solve this problem. Consider:
1552 // class A<int> B() {}
1553 // which can be a function definition or a class definition when B() is a
1554 // macro. If we find enough real-world cases where this is a problem, we
1555 // can parse for the 'template' keyword in the beginning of the statement,
1556 // and thus rule out the record production in case there is no template
1557 // (this would still leave us with an ambiguity between template function
1558 // and class declarations).
1559 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1560 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1561 if (FormatTok->Tok.is(tok::semi))
1562 return;
1563 nextToken();
1564 }
1565 }
1566 }
1567 if (FormatTok->Tok.is(tok::l_brace)) {
1568 if (ShouldBreakBeforeBrace(Style, InitialToken))
1569 addUnwrappedLine();
1570
1571 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1572 /*MunchSemi=*/false);
1573 }
1574 // We fall through to parsing a structural element afterwards, so
1575 // class A {} n, m;
1576 // will end up in one unwrapped line.
1577 // This does not apply for Java.
1578 if (Style.Language == FormatStyle::LK_Java ||
1579 Style.Language == FormatStyle::LK_JavaScript)
1580 addUnwrappedLine();
1581 }
1582
parseObjCProtocolList()1583 void UnwrappedLineParser::parseObjCProtocolList() {
1584 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1585 do
1586 nextToken();
1587 while (!eof() && FormatTok->Tok.isNot(tok::greater));
1588 nextToken(); // Skip '>'.
1589 }
1590
parseObjCUntilAtEnd()1591 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1592 do {
1593 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1594 nextToken();
1595 addUnwrappedLine();
1596 break;
1597 }
1598 if (FormatTok->is(tok::l_brace)) {
1599 parseBlock(/*MustBeDeclaration=*/false);
1600 // In ObjC interfaces, nothing should be following the "}".
1601 addUnwrappedLine();
1602 } else if (FormatTok->is(tok::r_brace)) {
1603 // Ignore stray "}". parseStructuralElement doesn't consume them.
1604 nextToken();
1605 addUnwrappedLine();
1606 } else {
1607 parseStructuralElement();
1608 }
1609 } while (!eof());
1610 }
1611
parseObjCInterfaceOrImplementation()1612 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1613 nextToken();
1614 nextToken(); // interface name
1615
1616 // @interface can be followed by either a base class, or a category.
1617 if (FormatTok->Tok.is(tok::colon)) {
1618 nextToken();
1619 nextToken(); // base class name
1620 } else if (FormatTok->Tok.is(tok::l_paren))
1621 // Skip category, if present.
1622 parseParens();
1623
1624 if (FormatTok->Tok.is(tok::less))
1625 parseObjCProtocolList();
1626
1627 if (FormatTok->Tok.is(tok::l_brace)) {
1628 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1629 Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1630 addUnwrappedLine();
1631 parseBlock(/*MustBeDeclaration=*/true);
1632 }
1633
1634 // With instance variables, this puts '}' on its own line. Without instance
1635 // variables, this ends the @interface line.
1636 addUnwrappedLine();
1637
1638 parseObjCUntilAtEnd();
1639 }
1640
parseObjCProtocol()1641 void UnwrappedLineParser::parseObjCProtocol() {
1642 nextToken();
1643 nextToken(); // protocol name
1644
1645 if (FormatTok->Tok.is(tok::less))
1646 parseObjCProtocolList();
1647
1648 // Check for protocol declaration.
1649 if (FormatTok->Tok.is(tok::semi)) {
1650 nextToken();
1651 return addUnwrappedLine();
1652 }
1653
1654 addUnwrappedLine();
1655 parseObjCUntilAtEnd();
1656 }
1657
parseJavaScriptEs6ImportExport()1658 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1659 assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1660 nextToken();
1661
1662 if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_function,
1663 Keywords.kw_var))
1664 return; // Fall through to parsing the corresponding structure.
1665
1666 if (FormatTok->is(tok::kw_default)) {
1667 nextToken(); // export default ..., fall through after eating 'default'.
1668 return;
1669 }
1670
1671 if (FormatTok->is(tok::l_brace)) {
1672 FormatTok->BlockKind = BK_Block;
1673 parseBracedList();
1674 }
1675
1676 while (!eof() && FormatTok->isNot(tok::semi) &&
1677 FormatTok->isNot(tok::l_brace)) {
1678 nextToken();
1679 }
1680 }
1681
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")1682 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1683 StringRef Prefix = "") {
1684 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1685 << (Line.InPPDirective ? " MACRO" : "") << ": ";
1686 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1687 E = Line.Tokens.end();
1688 I != E; ++I) {
1689 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1690 }
1691 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1692 E = Line.Tokens.end();
1693 I != E; ++I) {
1694 const UnwrappedLineNode &Node = *I;
1695 for (SmallVectorImpl<UnwrappedLine>::const_iterator
1696 I = Node.Children.begin(),
1697 E = Node.Children.end();
1698 I != E; ++I) {
1699 printDebugInfo(*I, "\nChild: ");
1700 }
1701 }
1702 llvm::dbgs() << "\n";
1703 }
1704
addUnwrappedLine()1705 void UnwrappedLineParser::addUnwrappedLine() {
1706 if (Line->Tokens.empty())
1707 return;
1708 DEBUG({
1709 if (CurrentLines == &Lines)
1710 printDebugInfo(*Line);
1711 });
1712 CurrentLines->push_back(*Line);
1713 Line->Tokens.clear();
1714 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1715 for (SmallVectorImpl<UnwrappedLine>::iterator
1716 I = PreprocessorDirectives.begin(),
1717 E = PreprocessorDirectives.end();
1718 I != E; ++I) {
1719 CurrentLines->push_back(*I);
1720 }
1721 PreprocessorDirectives.clear();
1722 }
1723 }
1724
eof() const1725 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1726
isOnNewLine(const FormatToken & FormatTok)1727 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1728 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1729 FormatTok.NewlinesBefore > 0;
1730 }
1731
flushComments(bool NewlineBeforeNext)1732 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1733 bool JustComments = Line->Tokens.empty();
1734 for (SmallVectorImpl<FormatToken *>::const_iterator
1735 I = CommentsBeforeNextToken.begin(),
1736 E = CommentsBeforeNextToken.end();
1737 I != E; ++I) {
1738 if (isOnNewLine(**I) && JustComments) {
1739 addUnwrappedLine();
1740 }
1741 pushToken(*I);
1742 }
1743 if (NewlineBeforeNext && JustComments) {
1744 addUnwrappedLine();
1745 }
1746 CommentsBeforeNextToken.clear();
1747 }
1748
nextToken()1749 void UnwrappedLineParser::nextToken() {
1750 if (eof())
1751 return;
1752 flushComments(isOnNewLine(*FormatTok));
1753 pushToken(FormatTok);
1754 readToken();
1755 }
1756
readToken()1757 void UnwrappedLineParser::readToken() {
1758 bool CommentsInCurrentLine = true;
1759 do {
1760 FormatTok = Tokens->getNextToken();
1761 assert(FormatTok);
1762 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1763 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1764 // If there is an unfinished unwrapped line, we flush the preprocessor
1765 // directives only after that unwrapped line was finished later.
1766 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1767 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1768 // Comments stored before the preprocessor directive need to be output
1769 // before the preprocessor directive, at the same level as the
1770 // preprocessor directive, as we consider them to apply to the directive.
1771 flushComments(isOnNewLine(*FormatTok));
1772 parsePPDirective();
1773 }
1774 while (FormatTok->Type == TT_ConflictStart ||
1775 FormatTok->Type == TT_ConflictEnd ||
1776 FormatTok->Type == TT_ConflictAlternative) {
1777 if (FormatTok->Type == TT_ConflictStart) {
1778 conditionalCompilationStart(/*Unreachable=*/false);
1779 } else if (FormatTok->Type == TT_ConflictAlternative) {
1780 conditionalCompilationAlternative();
1781 } else if (FormatTok->Type == TT_ConflictEnd) {
1782 conditionalCompilationEnd();
1783 }
1784 FormatTok = Tokens->getNextToken();
1785 FormatTok->MustBreakBefore = true;
1786 }
1787
1788 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1789 !Line->InPPDirective) {
1790 continue;
1791 }
1792
1793 if (!FormatTok->Tok.is(tok::comment))
1794 return;
1795 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1796 CommentsInCurrentLine = false;
1797 }
1798 if (CommentsInCurrentLine) {
1799 pushToken(FormatTok);
1800 } else {
1801 CommentsBeforeNextToken.push_back(FormatTok);
1802 }
1803 } while (!eof());
1804 }
1805
pushToken(FormatToken * Tok)1806 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1807 Line->Tokens.push_back(UnwrappedLineNode(Tok));
1808 if (MustBreakBeforeNextToken) {
1809 Line->Tokens.back().Tok->MustBreakBefore = true;
1810 MustBreakBeforeNextToken = false;
1811 }
1812 }
1813
1814 } // end namespace format
1815 } // end namespace clang
1816