1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 enum LineType {
27   LT_Invalid,
28   LT_ImportStatement,
29   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
30   LT_ObjCMethodDecl,
31   LT_ObjCProperty, // An @property line.
32   LT_Other,
33   LT_PreprocessorDirective,
34   LT_VirtualFunctionDecl
35 };
36 
37 class AnnotatedLine {
38 public:
AnnotatedLine(const UnwrappedLine & Line)39   AnnotatedLine(const UnwrappedLine &Line)
40       : First(Line.Tokens.front().Tok), Level(Line.Level),
41         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
42         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
43         InPPDirective(Line.InPPDirective),
44         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
45         IsMultiVariableDeclStmt(false), Affected(false),
46         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
47         FirstStartColumn(Line.FirstStartColumn) {
48     assert(!Line.Tokens.empty());
49 
50     // Calculate Next and Previous for all tokens. Note that we must overwrite
51     // Next and Previous for every token, as previous formatting runs might have
52     // left them in a different state.
53     First->Previous = nullptr;
54     FormatToken *Current = First;
55     for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
56                                                       E = Line.Tokens.end();
57          I != E; ++I) {
58       const UnwrappedLineNode &Node = *I;
59       Current->Next = I->Tok;
60       I->Tok->Previous = Current;
61       Current = Current->Next;
62       Current->Children.clear();
63       for (const auto &Child : Node.Children) {
64         Children.push_back(new AnnotatedLine(Child));
65         Current->Children.push_back(Children.back());
66       }
67     }
68     Last = Current;
69     Last->Next = nullptr;
70   }
71 
~AnnotatedLine()72   ~AnnotatedLine() {
73     for (unsigned i = 0, e = Children.size(); i != e; ++i) {
74       delete Children[i];
75     }
76     FormatToken *Current = First;
77     while (Current) {
78       Current->Children.clear();
79       Current->Role.reset();
80       Current = Current->Next;
81     }
82   }
83 
84   /// \c true if this line starts with the given tokens in order, ignoring
85   /// comments.
startsWith(Ts...Tokens)86   template <typename... Ts> bool startsWith(Ts... Tokens) const {
87     return First && First->startsSequence(Tokens...);
88   }
89 
90   /// \c true if this line ends with the given tokens in reversed order,
91   /// ignoring comments.
92   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
93   /// this line is like "... T3 T2 T1".
endsWith(Ts...Tokens)94   template <typename... Ts> bool endsWith(Ts... Tokens) const {
95     return Last && Last->endsSequence(Tokens...);
96   }
97 
98   /// \c true if this line looks like a function definition instead of a
99   /// function declaration. Asserts MightBeFunctionDecl.
mightBeFunctionDefinition()100   bool mightBeFunctionDefinition() const {
101     assert(MightBeFunctionDecl);
102     // Try to determine if the end of a stream of tokens is either the
103     // Definition or the Declaration for a function. It does this by looking for
104     // the ';' in foo(); and using that it ends with a ; to know this is the
105     // Definition, however the line could end with
106     //    foo(); /* comment */
107     // or
108     //    foo(); // comment
109     // or
110     //    foo() // comment
111     // endsWith() ignores the comment.
112     return !endsWith(tok::semi);
113   }
114 
115   /// \c true if this line starts a namespace definition.
startsWithNamespace()116   bool startsWithNamespace() const {
117     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
118            startsWith(tok::kw_inline, tok::kw_namespace) ||
119            startsWith(tok::kw_export, tok::kw_namespace);
120   }
121 
122   FormatToken *First;
123   FormatToken *Last;
124 
125   SmallVector<AnnotatedLine *, 0> Children;
126 
127   LineType Type;
128   unsigned Level;
129   size_t MatchingOpeningBlockLineIndex;
130   size_t MatchingClosingBlockLineIndex;
131   bool InPPDirective;
132   bool MustBeDeclaration;
133   bool MightBeFunctionDecl;
134   bool IsMultiVariableDeclStmt;
135 
136   /// \c True if this line should be formatted, i.e. intersects directly or
137   /// indirectly with one of the input ranges.
138   bool Affected;
139 
140   /// \c True if the leading empty lines of this line intersect with one of the
141   /// input ranges.
142   bool LeadingEmptyLinesAffected;
143 
144   /// \c True if one of this line's children intersects with an input range.
145   bool ChildrenAffected;
146 
147   unsigned FirstStartColumn;
148 
149 private:
150   // Disallow copying.
151   AnnotatedLine(const AnnotatedLine &) = delete;
152   void operator=(const AnnotatedLine &) = delete;
153 };
154 
155 /// Determines extra information about the tokens comprising an
156 /// \c UnwrappedLine.
157 class TokenAnnotator {
158 public:
TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)159   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
160       : Style(Style), Keywords(Keywords) {}
161 
162   /// Adapts the indent levels of comment lines to the indent of the
163   /// subsequent line.
164   // FIXME: Can/should this be done in the UnwrappedLineParser?
165   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
166 
167   void annotate(AnnotatedLine &Line);
168   void calculateFormattingInformation(AnnotatedLine &Line);
169 
170 private:
171   /// Calculate the penalty for splitting before \c Tok.
172   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
173                         bool InFunctionDecl);
174 
175   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
176 
177   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
178                             const FormatToken &Right);
179 
180   bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
181 
182   bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
183 
184   bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
185 
186   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
187 
188   void printDebugInfo(const AnnotatedLine &Line);
189 
190   void calculateUnbreakableTailLengths(AnnotatedLine &Line);
191 
192   const FormatStyle &Style;
193 
194   const AdditionalKeywords &Keywords;
195 };
196 
197 } // end namespace format
198 } // end namespace clang
199 
200 #endif
201