1 //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code rewrites macro invocations into their expansions.  This gives you
11 // a macro expanded file that retains comments and #includes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Rewrite/Frontend/Rewriters.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Rewrite/Core/Rewriter.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include <cstdio>
22 #include <memory>
23 
24 using namespace clang;
25 
26 /// isSameToken - Return true if the two specified tokens start have the same
27 /// content.
isSameToken(Token & RawTok,Token & PPTok)28 static bool isSameToken(Token &RawTok, Token &PPTok) {
29   // If two tokens have the same kind and the same identifier info, they are
30   // obviously the same.
31   if (PPTok.getKind() == RawTok.getKind() &&
32       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
33     return true;
34 
35   // Otherwise, if they are different but have the same identifier info, they
36   // are also considered to be the same.  This allows keywords and raw lexed
37   // identifiers with the same name to be treated the same.
38   if (PPTok.getIdentifierInfo() &&
39       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
40     return true;
41 
42   return false;
43 }
44 
45 
46 /// GetNextRawTok - Return the next raw token in the stream, skipping over
47 /// comments if ReturnComment is false.
GetNextRawTok(const std::vector<Token> & RawTokens,unsigned & CurTok,bool ReturnComment)48 static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
49                                   unsigned &CurTok, bool ReturnComment) {
50   assert(CurTok < RawTokens.size() && "Overran eof!");
51 
52   // If the client doesn't want comments and we have one, skip it.
53   if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
54     ++CurTok;
55 
56   return RawTokens[CurTok++];
57 }
58 
59 
60 /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
61 /// the specified vector.
LexRawTokensFromMainFile(Preprocessor & PP,std::vector<Token> & RawTokens)62 static void LexRawTokensFromMainFile(Preprocessor &PP,
63                                      std::vector<Token> &RawTokens) {
64   SourceManager &SM = PP.getSourceManager();
65 
66   // Create a lexer to lex all the tokens of the main file in raw mode.  Even
67   // though it is in raw mode, it will not return comments.
68   const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
69   Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
70 
71   // Switch on comment lexing because we really do want them.
72   RawLex.SetCommentRetentionState(true);
73 
74   Token RawTok;
75   do {
76     RawLex.LexFromRawLexer(RawTok);
77 
78     // If we have an identifier with no identifier info for our raw token, look
79     // up the indentifier info.  This is important for equality comparison of
80     // identifier tokens.
81     if (RawTok.is(tok::raw_identifier))
82       PP.LookUpIdentifierInfo(RawTok);
83 
84     RawTokens.push_back(RawTok);
85   } while (RawTok.isNot(tok::eof));
86 }
87 
88 
89 /// RewriteMacrosInInput - Implement -rewrite-macros mode.
RewriteMacrosInInput(Preprocessor & PP,raw_ostream * OS)90 void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
91   SourceManager &SM = PP.getSourceManager();
92 
93   Rewriter Rewrite;
94   Rewrite.setSourceMgr(SM, PP.getLangOpts());
95   RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
96 
97   std::vector<Token> RawTokens;
98   LexRawTokensFromMainFile(PP, RawTokens);
99   unsigned CurRawTok = 0;
100   Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
101 
102 
103   // Get the first preprocessing token.
104   PP.EnterMainSourceFile();
105   Token PPTok;
106   PP.Lex(PPTok);
107 
108   // Preprocess the input file in parallel with raw lexing the main file. Ignore
109   // all tokens that are preprocessed from a file other than the main file (e.g.
110   // a header).  If we see tokens that are in the preprocessed file but not the
111   // lexed file, we have a macro expansion.  If we see tokens in the lexed file
112   // that aren't in the preprocessed view, we have macros that expand to no
113   // tokens, or macro arguments etc.
114   while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
115     SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
116 
117     // If PPTok is from a different source file, ignore it.
118     if (!SM.isWrittenInMainFile(PPLoc)) {
119       PP.Lex(PPTok);
120       continue;
121     }
122 
123     // If the raw file hits a preprocessor directive, they will be extra tokens
124     // in the raw file that don't exist in the preprocsesed file.  However, we
125     // choose to preserve them in the output file and otherwise handle them
126     // specially.
127     if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
128       // If this is a #warning directive or #pragma mark (GNU extensions),
129       // comment the line out.
130       if (RawTokens[CurRawTok].is(tok::identifier)) {
131         const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
132         if (II->getName() == "warning") {
133           // Comment out #warning.
134           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
135         } else if (II->getName() == "pragma" &&
136                    RawTokens[CurRawTok+1].is(tok::identifier) &&
137                    (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
138                     "mark")) {
139           // Comment out #pragma mark.
140           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
141         }
142       }
143 
144       // Otherwise, if this is a #include or some other directive, just leave it
145       // in the file by skipping over the line.
146       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
147       while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
148         RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
149       continue;
150     }
151 
152     // Okay, both tokens are from the same file.  Get their offsets from the
153     // start of the file.
154     unsigned PPOffs = SM.getFileOffset(PPLoc);
155     unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
156 
157     // If the offsets are the same and the token kind is the same, ignore them.
158     if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
159       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
160       PP.Lex(PPTok);
161       continue;
162     }
163 
164     // If the PP token is farther along than the raw token, something was
165     // deleted.  Comment out the raw token.
166     if (RawOffs <= PPOffs) {
167       // Comment out a whole run of tokens instead of bracketing each one with
168       // comments.  Add a leading space if RawTok didn't have one.
169       bool HasSpace = RawTok.hasLeadingSpace();
170       RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
171       unsigned EndPos;
172 
173       do {
174         EndPos = RawOffs+RawTok.getLength();
175 
176         RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
177         RawOffs = SM.getFileOffset(RawTok.getLocation());
178 
179         if (RawTok.is(tok::comment)) {
180           // Skip past the comment.
181           RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
182           break;
183         }
184 
185       } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
186                (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
187 
188       RB.InsertTextBefore(EndPos, "*/");
189       continue;
190     }
191 
192     // Otherwise, there was a replacement an expansion.  Insert the new token
193     // in the output buffer.  Insert the whole run of new tokens at once to get
194     // them in the right order.
195     unsigned InsertPos = PPOffs;
196     std::string Expansion;
197     while (PPOffs < RawOffs) {
198       Expansion += ' ' + PP.getSpelling(PPTok);
199       PP.Lex(PPTok);
200       PPLoc = SM.getExpansionLoc(PPTok.getLocation());
201       PPOffs = SM.getFileOffset(PPLoc);
202     }
203     Expansion += ' ';
204     RB.InsertTextBefore(InsertPos, Expansion);
205   }
206 
207   // Get the buffer corresponding to MainFileID.  If we haven't changed it, then
208   // we are done.
209   if (const RewriteBuffer *RewriteBuf =
210       Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
211     //printf("Changed:\n");
212     *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
213   } else {
214     fprintf(stderr, "No changes\n");
215   }
216   OS->flush();
217 }
218