1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/AST/RawCommentList.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/Comment.h"
13 #include "clang/AST/CommentBriefParser.h"
14 #include "clang/AST/CommentCommandTraits.h"
15 #include "clang/AST/CommentLexer.h"
16 #include "clang/AST/CommentParser.h"
17 #include "clang/AST/CommentSema.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/STLExtras.h"
20 
21 using namespace clang;
22 
23 namespace {
24 /// Get comment kind and bool describing if it is a trailing comment.
getCommentKind(StringRef Comment,bool ParseAllComments)25 std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
26                                                         bool ParseAllComments) {
27   const size_t MinCommentLength = ParseAllComments ? 2 : 3;
28   if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
29     return std::make_pair(RawComment::RCK_Invalid, false);
30 
31   RawComment::CommentKind K;
32   if (Comment[1] == '/') {
33     if (Comment.size() < 3)
34       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
35 
36     if (Comment[2] == '/')
37       K = RawComment::RCK_BCPLSlash;
38     else if (Comment[2] == '!')
39       K = RawComment::RCK_BCPLExcl;
40     else
41       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
42   } else {
43     assert(Comment.size() >= 4);
44 
45     // Comment lexer does not understand escapes in comment markers, so pretend
46     // that this is not a comment.
47     if (Comment[1] != '*' ||
48         Comment[Comment.size() - 2] != '*' ||
49         Comment[Comment.size() - 1] != '/')
50       return std::make_pair(RawComment::RCK_Invalid, false);
51 
52     if (Comment[2] == '*')
53       K = RawComment::RCK_JavaDoc;
54     else if (Comment[2] == '!')
55       K = RawComment::RCK_Qt;
56     else
57       return std::make_pair(RawComment::RCK_OrdinaryC, false);
58   }
59   const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
60   return std::make_pair(K, TrailingComment);
61 }
62 
mergedCommentIsTrailingComment(StringRef Comment)63 bool mergedCommentIsTrailingComment(StringRef Comment) {
64   return (Comment.size() > 3) && (Comment[3] == '<');
65 }
66 
67 /// Returns true if R1 and R2 both have valid locations that start on the same
68 /// column.
commentsStartOnSameColumn(const SourceManager & SM,const RawComment & R1,const RawComment & R2)69 bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
70                                const RawComment &R2) {
71   SourceLocation L1 = R1.getLocStart();
72   SourceLocation L2 = R2.getLocStart();
73   bool Invalid = false;
74   unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
75   if (!Invalid) {
76     unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
77     return !Invalid && (C1 == C2);
78   }
79   return false;
80 }
81 } // unnamed namespace
82 
83 /// \brief Determines whether there is only whitespace in `Buffer` between `P`
84 /// and the previous line.
85 /// \param Buffer The buffer to search in.
86 /// \param P The offset from the beginning of `Buffer` to start from.
87 /// \return true if all of the characters in `Buffer` ranging from the closest
88 /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
89 /// are whitespace.
onlyWhitespaceOnLineBefore(const char * Buffer,unsigned P)90 static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
91   // Search backwards until we see linefeed or carriage return.
92   for (unsigned I = P; I != 0; --I) {
93     char C = Buffer[I - 1];
94     if (isVerticalWhitespace(C))
95       return true;
96     if (!isHorizontalWhitespace(C))
97       return false;
98   }
99   // We hit the beginning of the buffer.
100   return true;
101 }
102 
103 /// Returns whether `K` is an ordinary comment kind.
isOrdinaryKind(RawComment::CommentKind K)104 static bool isOrdinaryKind(RawComment::CommentKind K) {
105   return (K == RawComment::RCK_OrdinaryBCPL) ||
106          (K == RawComment::RCK_OrdinaryC);
107 }
108 
RawComment(const SourceManager & SourceMgr,SourceRange SR,bool Merged,bool ParseAllComments)109 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
110                        bool Merged, bool ParseAllComments) :
111     Range(SR), RawTextValid(false), BriefTextValid(false),
112     IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
113     ParseAllComments(ParseAllComments) {
114   // Extract raw comment text, if possible.
115   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
116     Kind = RCK_Invalid;
117     return;
118   }
119 
120   // Guess comment kind.
121   std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
122 
123   // Guess whether an ordinary comment is trailing.
124   if (ParseAllComments && isOrdinaryKind(K.first)) {
125     FileID BeginFileID;
126     unsigned BeginOffset;
127     std::tie(BeginFileID, BeginOffset) =
128         SourceMgr.getDecomposedLoc(Range.getBegin());
129     if (BeginOffset != 0) {
130       bool Invalid = false;
131       const char *Buffer =
132           SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133       IsTrailingComment |=
134           (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135     }
136   }
137 
138   if (!Merged) {
139     Kind = K.first;
140     IsTrailingComment |= K.second;
141 
142     IsAlmostTrailingComment = RawText.startswith("//<") ||
143                                  RawText.startswith("/*<");
144   } else {
145     Kind = RCK_Merged;
146     IsTrailingComment =
147         IsTrailingComment || mergedCommentIsTrailingComment(RawText);
148   }
149 }
150 
getRawTextSlow(const SourceManager & SourceMgr) const151 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152   FileID BeginFileID;
153   FileID EndFileID;
154   unsigned BeginOffset;
155   unsigned EndOffset;
156 
157   std::tie(BeginFileID, BeginOffset) =
158       SourceMgr.getDecomposedLoc(Range.getBegin());
159   std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160 
161   const unsigned Length = EndOffset - BeginOffset;
162   if (Length < 2)
163     return StringRef();
164 
165   // The comment can't begin in one file and end in another.
166   assert(BeginFileID == EndFileID);
167 
168   bool Invalid = false;
169   const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170                                                     &Invalid).data();
171   if (Invalid)
172     return StringRef();
173 
174   return StringRef(BufferStart + BeginOffset, Length);
175 }
176 
extractBriefText(const ASTContext & Context) const177 const char *RawComment::extractBriefText(const ASTContext &Context) const {
178   // Make sure that RawText is valid.
179   getRawText(Context.getSourceManager());
180 
181   // Since we will be copying the resulting text, all allocations made during
182   // parsing are garbage after resulting string is formed.  Thus we can use
183   // a separate allocator for all temporary stuff.
184   llvm::BumpPtrAllocator Allocator;
185 
186   comments::Lexer L(Allocator, Context.getDiagnostics(),
187                     Context.getCommentCommandTraits(),
188                     Range.getBegin(),
189                     RawText.begin(), RawText.end());
190   comments::BriefParser P(L, Context.getCommentCommandTraits());
191 
192   const std::string Result = P.Parse();
193   const unsigned BriefTextLength = Result.size();
194   char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195   memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196   BriefText = BriefTextPtr;
197   BriefTextValid = true;
198 
199   return BriefTextPtr;
200 }
201 
parse(const ASTContext & Context,const Preprocessor * PP,const Decl * D) const202 comments::FullComment *RawComment::parse(const ASTContext &Context,
203                                          const Preprocessor *PP,
204                                          const Decl *D) const {
205   // Make sure that RawText is valid.
206   getRawText(Context.getSourceManager());
207 
208   comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209                     Context.getCommentCommandTraits(),
210                     getSourceRange().getBegin(),
211                     RawText.begin(), RawText.end());
212   comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213                    Context.getDiagnostics(),
214                    Context.getCommentCommandTraits(),
215                    PP);
216   S.setDecl(D);
217   comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218                      Context.getDiagnostics(),
219                      Context.getCommentCommandTraits());
220 
221   return P.parseFullComment();
222 }
223 
onlyWhitespaceBetween(SourceManager & SM,SourceLocation Loc1,SourceLocation Loc2,unsigned MaxNewlinesAllowed)224 static bool onlyWhitespaceBetween(SourceManager &SM,
225                                   SourceLocation Loc1, SourceLocation Loc2,
226                                   unsigned MaxNewlinesAllowed) {
227   std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228   std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229 
230   // Question does not make sense if locations are in different files.
231   if (Loc1Info.first != Loc2Info.first)
232     return false;
233 
234   bool Invalid = false;
235   const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236   if (Invalid)
237     return false;
238 
239   unsigned NumNewlines = 0;
240   assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241   // Look for non-whitespace characters and remember any newlines seen.
242   for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243     switch (Buffer[I]) {
244     default:
245       return false;
246     case ' ':
247     case '\t':
248     case '\f':
249     case '\v':
250       break;
251     case '\r':
252     case '\n':
253       ++NumNewlines;
254 
255       // Check if we have found more than the maximum allowed number of
256       // newlines.
257       if (NumNewlines > MaxNewlinesAllowed)
258         return false;
259 
260       // Collapse \r\n and \n\r into a single newline.
261       if (I + 1 != Loc2Info.second &&
262           (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
263           Buffer[I] != Buffer[I + 1])
264         ++I;
265       break;
266     }
267   }
268 
269   return true;
270 }
271 
addComment(const RawComment & RC,llvm::BumpPtrAllocator & Allocator)272 void RawCommentList::addComment(const RawComment &RC,
273                                 llvm::BumpPtrAllocator &Allocator) {
274   if (RC.isInvalid())
275     return;
276 
277   // Check if the comments are not in source order.
278   while (!Comments.empty() &&
279          !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
280                                               RC.getLocStart())) {
281     // If they are, just pop a few last comments that don't fit.
282     // This happens if an \#include directive contains comments.
283     Comments.pop_back();
284   }
285 
286   // Ordinary comments are not interesting for us.
287   if (RC.isOrdinary())
288     return;
289 
290   // If this is the first Doxygen comment, save it (because there isn't
291   // anything to merge it with).
292   if (Comments.empty()) {
293     Comments.push_back(new (Allocator) RawComment(RC));
294     return;
295   }
296 
297   const RawComment &C1 = *Comments.back();
298   const RawComment &C2 = RC;
299 
300   // Merge comments only if there is only whitespace between them.
301   // Can't merge trailing and non-trailing comments unless the second is
302   // non-trailing ordinary in the same column, as in the case:
303   //   int x; // documents x
304   //          // more text
305   // versus:
306   //   int x; // documents x
307   //   int y; // documents y
308   // or:
309   //   int x; // documents x
310   //   // documents y
311   //   int y;
312   // Merge comments if they are on same or consecutive lines.
313   if ((C1.isTrailingComment() == C2.isTrailingComment() ||
314        (C1.isTrailingComment() && !C2.isTrailingComment() &&
315         isOrdinaryKind(C2.getKind()) &&
316         commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
317       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
318                             /*MaxNewlinesAllowed=*/1)) {
319     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
320     *Comments.back() = RawComment(SourceMgr, MergedRange, true,
321                                   RC.isParseAllComments());
322   } else {
323     Comments.push_back(new (Allocator) RawComment(RC));
324   }
325 }
326 
addDeserializedComments(ArrayRef<RawComment * > DeserializedComments)327 void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328   std::vector<RawComment *> MergedComments;
329   MergedComments.reserve(Comments.size() + DeserializedComments.size());
330 
331   std::merge(Comments.begin(), Comments.end(),
332              DeserializedComments.begin(), DeserializedComments.end(),
333              std::back_inserter(MergedComments),
334              BeforeThanCompare<RawComment>(SourceMgr));
335   std::swap(Comments, MergedComments);
336 }
337 
338