1 //===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements a combinartorial exploration of all the different 12 /// linebreaks unwrapped lines can be formatted in. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H 17 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H 18 19 #include "ContinuationIndenter.h" 20 #include "clang/Format/Format.h" 21 #include <map> 22 #include <queue> 23 #include <string> 24 25 namespace clang { 26 namespace format { 27 28 class ContinuationIndenter; 29 class WhitespaceManager; 30 31 class UnwrappedLineFormatter { 32 public: UnwrappedLineFormatter(ContinuationIndenter * Indenter,WhitespaceManager * Whitespaces,const FormatStyle & Style,const AdditionalKeywords & Keywords)33 UnwrappedLineFormatter(ContinuationIndenter *Indenter, 34 WhitespaceManager *Whitespaces, 35 const FormatStyle &Style, 36 const AdditionalKeywords &Keywords) 37 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), 38 Keywords(Keywords) {} 39 40 unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, 41 int AdditionalIndent = 0, bool FixBadIndentation = false); 42 43 private: 44 /// \brief Formats an \c AnnotatedLine and returns the penalty. 45 /// 46 /// If \p DryRun is \c false, directly applies the changes. 47 unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, 48 bool DryRun); 49 50 /// \brief An edge in the solution space from \c Previous->State to \c State, 51 /// inserting a newline dependent on the \c NewLine. 52 struct StateNode { StateNodeStateNode53 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 54 : State(State), NewLine(NewLine), Previous(Previous) {} 55 LineState State; 56 bool NewLine; 57 StateNode *Previous; 58 }; 59 60 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 61 /// 62 /// In case of equal penalties, we want to prefer states that were inserted 63 /// first. During state generation we make sure that we insert states first 64 /// that break the line as late as possible. 65 typedef std::pair<unsigned, unsigned> OrderedPenalty; 66 67 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 68 /// \c State has the given \c OrderedPenalty. 69 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 70 71 /// \brief The BFS queue type. 72 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 73 std::greater<QueueItem> > QueueType; 74 75 /// \brief Get the offset of the line relatively to the level. 76 /// 77 /// For example, 'public:' labels in classes are offset by 1 or 2 78 /// characters to the left from their level. getIndentOffset(const FormatToken & RootToken)79 int getIndentOffset(const FormatToken &RootToken) { 80 if (Style.Language == FormatStyle::LK_Java || 81 Style.Language == FormatStyle::LK_JavaScript) 82 return 0; 83 if (RootToken.isAccessSpecifier(false) || 84 RootToken.isObjCAccessSpecifier() || RootToken.is(Keywords.kw_signals)) 85 return Style.AccessModifierOffset; 86 return 0; 87 } 88 89 /// \brief Add a new line and the required indent before the first Token 90 /// of the \c UnwrappedLine if there was no structural parsing error. 91 void formatFirstToken(FormatToken &RootToken, 92 const AnnotatedLine *PreviousLine, unsigned IndentLevel, 93 unsigned Indent, bool InPPDirective); 94 95 /// \brief Get the indent of \p Level from \p IndentForLevel. 96 /// 97 /// \p IndentForLevel must contain the indent for the level \c l 98 /// at \p IndentForLevel[l], or a value < 0 if the indent for 99 /// that level is unknown. 100 unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level); 101 102 void join(AnnotatedLine &A, const AnnotatedLine &B); 103 getColumnLimit(bool InPPDirective)104 unsigned getColumnLimit(bool InPPDirective) const { 105 // In preprocessor directives reserve two chars for trailing " \" 106 return Style.ColumnLimit - (InPPDirective ? 2 : 0); 107 } 108 109 struct CompareLineStatePointers { operatorCompareLineStatePointers110 bool operator()(LineState *obj1, LineState *obj2) const { 111 return *obj1 < *obj2; 112 } 113 }; 114 115 /// \brief Analyze the entire solution space starting from \p InitialState. 116 /// 117 /// This implements a variant of Dijkstra's algorithm on the graph that spans 118 /// the solution space (\c LineStates are the nodes). The algorithm tries to 119 /// find the shortest path (the one with lowest penalty) from \p InitialState 120 /// to a state where all tokens are placed. Returns the penalty. 121 /// 122 /// If \p DryRun is \c false, directly applies the changes. 123 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false); 124 125 void reconstructPath(LineState &State, StateNode *Current); 126 127 /// \brief Add the following state to the analysis queue \c Queue. 128 /// 129 /// Assume the current state is \p PreviousNode and has been reached with a 130 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 131 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 132 bool NewLine, unsigned *Count, QueueType *Queue); 133 134 /// \brief If the \p State's next token is an r_brace closing a nested block, 135 /// format the nested block before it. 136 /// 137 /// Returns \c true if all children could be placed successfully and adapts 138 /// \p Penalty as well as \p State. If \p DryRun is false, also directly 139 /// creates changes using \c Whitespaces. 140 /// 141 /// The crucial idea here is that children always get formatted upon 142 /// encountering the closing brace right after the nested block. Now, if we 143 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is 144 /// \c false), the entire block has to be kept on the same line (which is only 145 /// possible if it fits on the line, only contains a single statement, etc. 146 /// 147 /// If \p NewLine is true, we format the nested block on separate lines, i.e. 148 /// break after the "{", format all lines with correct indentation and the put 149 /// the closing "}" on yet another new line. 150 /// 151 /// This enables us to keep the simple structure of the 152 /// \c UnwrappedLineFormatter, where we only have two options for each token: 153 /// break or don't break. 154 bool formatChildren(LineState &State, bool NewLine, bool DryRun, 155 unsigned &Penalty); 156 157 ContinuationIndenter *Indenter; 158 WhitespaceManager *Whitespaces; 159 FormatStyle Style; 160 const AdditionalKeywords &Keywords; 161 162 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 163 164 // Cache to store the penalty of formatting a vector of AnnotatedLines 165 // starting from a specific additional offset. Improves performance if there 166 // are many nested blocks. 167 std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, 168 unsigned> PenaltyCache; 169 }; 170 } // end namespace format 171 } // end namespace clang 172 173 #endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H 174